From 0d6f55403a47b317c0d9511a80545c19ab0ef52f Mon Sep 17 00:00:00 2001 From: John Ogness Date: Thu, 9 Jul 2020 15:29:41 +0206 Subject: crash: add VMCOREINFO macro to define offset in a struct declared by typedef The existing macro VMCOREINFO_OFFSET() can't be used for structures declared via typedef because "struct" is not part of type definition. Create another macro for this purpose. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Acked-by: Baoquan He Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20200709132344.760-2-john.ogness@linutronix.de --- include/linux/crash_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 525510a9f965..43b51c9df571 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -53,6 +53,9 @@ phys_addr_t paddr_vmcoreinfo_note(void); #define VMCOREINFO_OFFSET(name, field) \ vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ (unsigned long)offsetof(struct name, field)) +#define VMCOREINFO_TYPE_OFFSET(name, field) \ + vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ + (unsigned long)offsetof(name, field)) #define VMCOREINFO_LENGTH(name, value) \ vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) #define VMCOREINFO_NUMBER(name) \ -- cgit v1.2.3 From 7d8365771ffb0edc336f2cd45e96ef8214a83dca Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Fri, 3 Jul 2020 16:29:38 +0200 Subject: moduleparams: Add hexint type parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For bitmasks printing values in hex is more convenient. Prefix with `0x` to make it clear, that it’s a hex value, and pad it out. Using the helper for `amdgpu.ppfeaturemask`, it will look like below. Before: $ more /sys/module/amdgpu/parameters/ppfeaturemask 4294950911 After: $ more /sys/module/amdgpu/parameters/ppfeaturemask 0xffffbfff Cc: linux-kernel@vger.kernel.org Cc: amd-gfx@lists.freedesktop.org Signed-off-by: Paul Menzel Acked-by: Linus Torvalds Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/374726/ --- include/linux/moduleparam.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 3ef917ff0964..cff7261e98bb 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -118,7 +118,7 @@ struct kparam_array * you can create your own by defining those variables. * * Standard types are: - * byte, short, ushort, int, uint, long, ulong + * byte, hexint, short, ushort, int, uint, long, ulong * charp: a character pointer * bool: a bool, values 0/1, y/n, Y/N. * invbool: the above, only sense-reversed (N = true). @@ -448,6 +448,11 @@ extern int param_set_ullong(const char *val, const struct kernel_param *kp); extern int param_get_ullong(char *buffer, const struct kernel_param *kp); #define param_check_ullong(name, p) __param_check(name, p, unsigned long long) +extern const struct kernel_param_ops param_ops_hexint; +extern int param_set_hexint(const char *val, const struct kernel_param *kp); +extern int param_get_hexint(char *buffer, const struct kernel_param *kp); +#define param_check_hexint(name, p) param_check_uint(name, p) + extern const struct kernel_param_ops param_ops_charp; extern int param_set_charp(const char *val, const struct kernel_param *kp); extern int param_get_charp(char *buffer, const struct kernel_param *kp); -- cgit v1.2.3 From 2d05f56af8f52d52dc614ddf4d51c00ea5afb67f Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 29 Jul 2020 15:41:44 +0200 Subject: fbdev: Remove trailing whitespace Removes trailing whitespaces in several places. Signed-off-by: Thomas Zimmermann Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20200729134148.6855-2-tzimmermann@suse.de --- include/linux/fb.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 2b530e6d86e4..714187bc13ac 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -124,7 +124,7 @@ struct fb_cursor_user { * Register/unregister for framebuffer events */ -/* The resolution of the passed in fb_info about to change */ +/* The resolution of the passed in fb_info about to change */ #define FB_EVENT_MODE_CHANGE 0x01 #ifdef CONFIG_GUMSTIX_AM200EPD @@ -459,12 +459,12 @@ struct fb_info { #if IS_ENABLED(CONFIG_FB_BACKLIGHT) /* assigned backlight device */ - /* set before framebuffer registration, + /* set before framebuffer registration, remove after unregister */ struct backlight_device *bl_dev; /* Backlight level curve */ - struct mutex bl_curve_mutex; + struct mutex bl_curve_mutex; u8 bl_curve[FB_BACKLIGHT_LEVELS]; #endif #ifdef CONFIG_FB_DEFERRED_IO @@ -483,8 +483,8 @@ struct fb_info { char __iomem *screen_base; /* Virtual address */ char *screen_buffer; }; - unsigned long screen_size; /* Amount of ioremapped VRAM or 0 */ - void *pseudo_palette; /* Fake palette of 16 colors */ + unsigned long screen_size; /* Amount of ioremapped VRAM or 0 */ + void *pseudo_palette; /* Fake palette of 16 colors */ #define FBINFO_STATE_RUNNING 0 #define FBINFO_STATE_SUSPENDED 1 u32 state; /* Hardware state i.e suspend */ @@ -587,11 +587,11 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { * `Generic' versions of the frame buffer device operations */ -extern int fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var); -extern int fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var); +extern int fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var); +extern int fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var); extern int fb_blank(struct fb_info *info, int blank); -extern void cfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect); -extern void cfb_copyarea(struct fb_info *info, const struct fb_copyarea *area); +extern void cfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect); +extern void cfb_copyarea(struct fb_info *info, const struct fb_copyarea *area); extern void cfb_imageblit(struct fb_info *info, const struct fb_image *image); /* * Drawing operations where framebuffer is in system RAM -- cgit v1.2.3 From e544ea57ac0734bca752eb2d8635fecbe932c356 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 31 Jul 2020 16:07:46 -0700 Subject: x86/boot/compressed: Force hidden visibility for all symbol references Eliminate all GOT entries in the decompressor binary, by forcing hidden visibility for all symbol references, which informs the compiler that such references will be resolved at link time without the need for allocating GOT entries. To ensure that no GOT entries will creep back in, add an assertion to the decompressor linker script that will fire if the .got section has a non-zero size. [Arvind: move hidden.h to include/linux instead of making a copy] Signed-off-by: Ard Biesheuvel Signed-off-by: Arvind Sankar Signed-off-by: Kees Cook Signed-off-by: Ingo Molnar Tested-by: Nick Desaulniers Tested-by: Sedat Dilek Reviewed-by: Kees Cook Acked-by: Arvind Sankar Link: https://lore.kernel.org/r/20200731230820.1742553-3-keescook@chromium.org --- include/linux/hidden.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/hidden.h (limited to 'include/linux') diff --git a/include/linux/hidden.h b/include/linux/hidden.h new file mode 100644 index 000000000000..49a17b6b5962 --- /dev/null +++ b/include/linux/hidden.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * When building position independent code with GCC using the -fPIC option, + * (or even the -fPIE one on older versions), it will assume that we are + * building a dynamic object (either a shared library or an executable) that + * may have symbol references that can only be resolved at load time. For a + * variety of reasons (ELF symbol preemption, the CoW footprint of the section + * that is modified by the loader), this results in all references to symbols + * with external linkage to go via entries in the Global Offset Table (GOT), + * which carries absolute addresses which need to be fixed up when the + * executable image is loaded at an offset which is different from its link + * time offset. + * + * Fortunately, there is a way to inform the compiler that such symbol + * references will be satisfied at link time rather than at load time, by + * giving them 'hidden' visibility. + */ + +#pragma GCC visibility push(hidden) -- cgit v1.2.3 From e8ee6c8cb61b676f1a2d6b942329e98224bd8ee9 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 31 Jul 2020 23:08:26 +0300 Subject: dmaengine: dw: Add DMA-channels mask cell support DW DMA IP-core provides a way to synthesize the DMA controller with channels having different parameters like maximum burst-length, multi-block support, maximum data width, etc. Those parameters both explicitly and implicitly affect the channels performance. Since DMA slave devices might be very demanding to the DMA performance, let's provide a functionality for the slaves to be assigned with DW DMA channels, which performance according to the platform engineer fulfill their requirements. After this patch is applied it can be done by passing the mask of suitable DMA-channels either directly in the dw_dma_slave structure instance or as a fifth cell of the DMA DT-property. If mask is zero or not provided, then there is no limitation on the channels allocation. For instance Baikal-T1 SoC is equipped with a DW DMAC engine, which first two channels are synthesized with max burst length of 16, while the rest of the channels have been created with max-burst-len=4. It would seem that the first two channels must be faster than the others and should be more preferable for the time-critical DMA slave devices. In practice it turned out that the situation is quite the opposite. The channels with max-burst-len=4 demonstrated a better performance than the channels with max-burst-len=16 even when they both had been initialized with the same settings. The performance drop of the first two DMA-channels made them unsuitable for the DW APB SSI slave device. No matter what settings they are configured with, full-duplex SPI transfers occasionally experience the Rx FIFO overflow. It means that the DMA-engine doesn't keep up with incoming data pace even though the SPI-bus is enabled with speed of 25MHz while the DW DMA controller is clocked with 50MHz signal. There is no such problem has been noticed for the channels synthesized with max-burst-len=4. Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20200731200826.9292-6-Sergey.Semin@baikalelectronics.ru Signed-off-by: Vinod Koul --- include/linux/platform_data/dma-dw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index fbbeb2f6189b..b34a094b2258 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -26,6 +26,7 @@ struct device; * @dst_id: dst request line * @m_master: memory master for transfers on allocated channel * @p_master: peripheral master for transfers on allocated channel + * @channels: mask of the channels permitted for allocation (zero value means any) * @hs_polarity:set active low polarity of handshake interface */ struct dw_dma_slave { @@ -34,6 +35,7 @@ struct dw_dma_slave { u8 dst_id; u8 m_master; u8 p_master; + u8 channels; bool hs_polarity; }; -- cgit v1.2.3 From d92aabca4df182763cd541d342f2d55f8c0a827c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 25 Jul 2020 21:15:20 -0700 Subject: firmware: bcm47xx_sprom: Fix -Wmissing-prototypes warnings bcm47xx_sprom.h did not include a prototype for bcm47xx_fill_sprom() therefore add one, and make sure we do include that header to fix -Wmissing-prototypes warnings. Reported-by: kernel test robot Signed-off-by: Florian Fainelli Signed-off-by: Thomas Bogendoerfer --- include/linux/bcm47xx_sprom.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcm47xx_sprom.h b/include/linux/bcm47xx_sprom.h index b0f4424f34fc..f8254fd53e15 100644 --- a/include/linux/bcm47xx_sprom.h +++ b/include/linux/bcm47xx_sprom.h @@ -9,9 +9,19 @@ #include #include +struct ssb_sprom; + #ifdef CONFIG_BCM47XX_SPROM +void bcm47xx_fill_sprom(struct ssb_sprom *sprom, const char *prefix, + bool fallback); int bcm47xx_sprom_register_fallbacks(void); #else +static inline void bcm47xx_fill_sprom(struct ssb_sprom *sprom, + const char *prefix, + bool fallback) +{ +} + static inline int bcm47xx_sprom_register_fallbacks(void) { return -ENOTSUPP; -- cgit v1.2.3 From c2fe8ebb332eefb3d0543b248e28dd2992c04793 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 4 Aug 2020 21:26:42 +0200 Subject: clk: samsung: s3c64xx: declare s3c64xx_clk_init() in shared header The s3c64xx_clk_init() is defined and used by the clk-s3c64xx driver and also used in the mach-s3c64xx machine code. Move the declaration to a header to fix W=1 build warning: drivers/clk/samsung/clk-s3c64xx.c:391:13: warning: no previous prototype for 's3c64xx_clk_init' [-Wmissing-prototypes] 391 | void __init s3c64xx_clk_init(struct device_node *np, unsigned long xtal_f, Signed-off-by: Krzysztof Kozlowski Reviewed-by: Tomasz Figa Acked-by: Chanwoo Choi Reviewed-by: Stephen Boyd --- include/linux/clk/samsung.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 include/linux/clk/samsung.h (limited to 'include/linux') diff --git a/include/linux/clk/samsung.h b/include/linux/clk/samsung.h new file mode 100644 index 000000000000..7a0824b22eed --- /dev/null +++ b/include/linux/clk/samsung.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020 Krzysztof Kozlowski + */ + +#ifndef __LINUX_CLK_SAMSUNG_H_ +#define __LINUX_CLK_SAMSUNG_H_ + +#include + +struct device_node; + +#ifdef CONFIG_ARCH_S3C64XX +void s3c64xx_clk_init(struct device_node *np, unsigned long xtal_f, + unsigned long xusbxti_f, bool s3c6400, + void __iomem *base); +#else +static inline void s3c64xx_clk_init(struct device_node *np, + unsigned long xtal_f, + unsigned long xusbxti_f, + bool s3c6400, void __iomem *base) { } +#endif /* CONFIG_ARCH_S3C64XX */ + +#endif /* __LINUX_CLK_SAMSUNG_H_ */ -- cgit v1.2.3 From 16b17fcf77f2145b98cabbca6bfe6ea13c90bb08 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 4 Aug 2020 21:26:43 +0200 Subject: clk: samsung: s3c24xx: declare s3c24xx_common_clk_init() in shared header The s3c2410_common_clk_init() and others are defined and used by the clk-s3c24xx driver and also used in the mach-s3c24xx machine code. Move the declaration to a header to fix W=1 build warnings: drivers/clk/samsung/clk-s3c2410.c:320:13: warning: no previous prototype for 's3c2410_common_clk_init' [-Wmissing-prototypes] 320 | void __init s3c2410_common_clk_init(struct device_node *np, unsigned long xti_f, drivers/clk/samsung/clk-s3c2412.c:205:13: warning: no previous prototype for 's3c2412_common_clk_init' [-Wmissing-prototypes] 205 | void __init s3c2412_common_clk_init(struct device_node *np, unsigned long xti_f, drivers/clk/samsung/clk-s3c2443.c:341:13: warning: no previous prototype for 's3c2443_common_clk_init' [-Wmissing-prototypes] 341 | void __init s3c2443_common_clk_init(struct device_node *np, unsigned long xti_f, Signed-off-by: Krzysztof Kozlowski Acked-by: Chanwoo Choi Reviewed-by: Stephen Boyd --- include/linux/clk/samsung.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/samsung.h b/include/linux/clk/samsung.h index 7a0824b22eed..79097e365f7f 100644 --- a/include/linux/clk/samsung.h +++ b/include/linux/clk/samsung.h @@ -21,4 +21,36 @@ static inline void s3c64xx_clk_init(struct device_node *np, bool s3c6400, void __iomem *base) { } #endif /* CONFIG_ARCH_S3C64XX */ +#ifdef CONFIG_S3C2410_COMMON_CLK +void s3c2410_common_clk_init(struct device_node *np, unsigned long xti_f, + int current_soc, + void __iomem *reg_base); +#else +static inline void s3c2410_common_clk_init(struct device_node *np, + unsigned long xti_f, + int current_soc, + void __iomem *reg_base) { } +#endif /* CONFIG_S3C2410_COMMON_CLK */ + +#ifdef CONFIG_S3C2412_COMMON_CLK +void s3c2412_common_clk_init(struct device_node *np, unsigned long xti_f, + unsigned long ext_f, void __iomem *reg_base); +#else +static inline void s3c2412_common_clk_init(struct device_node *np, + unsigned long xti_f, + unsigned long ext_f, + void __iomem *reg_base) { } +#endif /* CONFIG_S3C2412_COMMON_CLK */ + +#ifdef CONFIG_S3C2443_COMMON_CLK +void s3c2443_common_clk_init(struct device_node *np, unsigned long xti_f, + int current_soc, + void __iomem *reg_base); +#else +static inline void s3c2443_common_clk_init(struct device_node *np, + unsigned long xti_f, + int current_soc, + void __iomem *reg_base) { } +#endif /* CONFIG_S3C2443_COMMON_CLK */ + #endif /* __LINUX_CLK_SAMSUNG_H_ */ -- cgit v1.2.3 From a0308938ec81cd0dca9d75833ec0dd1b8708917e Mon Sep 17 00:00:00 2001 From: David Stevens Date: Tue, 18 Aug 2020 16:13:41 +0900 Subject: virtio: add dma-buf support for exported objects This change adds a new flavor of dma-bufs that can be used by virtio drivers to share exported objects. A virtio dma-buf can be queried by virtio drivers to obtain the UUID which identifies the underlying exported object. Signed-off-by: David Stevens Acked-by: Michael S. Tsirkin Link: http://patchwork.freedesktop.org/patch/msgid/20200818071343.3461203-2-stevensd@chromium.org Signed-off-by: Gerd Hoffmann --- include/linux/virtio.h | 1 + include/linux/virtio_dma_buf.h | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 include/linux/virtio_dma_buf.h (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index a493eac08393..55ea329fe72a 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -127,6 +127,7 @@ static inline struct virtio_device *dev_to_virtio(struct device *_dev) void virtio_add_status(struct virtio_device *dev, unsigned int status); int register_virtio_device(struct virtio_device *dev); void unregister_virtio_device(struct virtio_device *dev); +bool is_virtio_device(struct device *dev); void virtio_break_device(struct virtio_device *dev); diff --git a/include/linux/virtio_dma_buf.h b/include/linux/virtio_dma_buf.h new file mode 100644 index 000000000000..a2fdf217ac62 --- /dev/null +++ b/include/linux/virtio_dma_buf.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * dma-bufs for virtio exported objects + * + * Copyright (C) 2020 Google, Inc. + */ + +#ifndef _LINUX_VIRTIO_DMA_BUF_H +#define _LINUX_VIRTIO_DMA_BUF_H + +#include +#include +#include + +/** + * struct virtio_dma_buf_ops - operations possible on exported object dma-buf + * @ops: the base dma_buf_ops. ops.attach MUST be virtio_dma_buf_attach. + * @device_attach: [optional] callback invoked by virtio_dma_buf_attach during + * all attach operations. + * @get_uid: [required] callback to get the uuid of the exported object. + */ +struct virtio_dma_buf_ops { + struct dma_buf_ops ops; + int (*device_attach)(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach); + int (*get_uuid)(struct dma_buf *dma_buf, uuid_t *uuid); +}; + +int virtio_dma_buf_attach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach); + +struct dma_buf *virtio_dma_buf_export + (const struct dma_buf_export_info *exp_info); +bool is_virtio_dma_buf(struct dma_buf *dma_buf); +int virtio_dma_buf_get_uuid(struct dma_buf *dma_buf, uuid_t *uuid); + +#endif /* _LINUX_VIRTIO_DMA_BUF_H */ -- cgit v1.2.3 From d73568c4ccb01d01e20cd23fefbff8e4a05ddfac Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 18 Aug 2020 10:56:51 +0200 Subject: vt: make vc_data pointers const in selection.h There are many functions declared in selection.h which only read from struct vc_data passed as a parameter. Make all those uses const to hint the compiler a bit. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20200818085706.12163-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/console.h | 2 +- include/linux/consolemap.h | 3 ++- include/linux/selection.h | 14 ++++++++------ 3 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 0670d3491e0e..4b1e26c4cb42 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -74,7 +74,7 @@ struct consw { enum vc_intensity intensity, bool blink, bool underline, bool reverse, bool italic); void (*con_invert_region)(struct vc_data *vc, u16 *p, int count); - u16 *(*con_screen_pos)(struct vc_data *vc, int offset); + u16 *(*con_screen_pos)(const struct vc_data *vc, int offset); unsigned long (*con_getxy)(struct vc_data *vc, unsigned long position, int *px, int *py); /* diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 254246673390..bcfce748c9d8 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -17,7 +17,8 @@ #ifdef CONFIG_CONSOLE_TRANSLATIONS struct vc_data; -extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode); +extern u16 inverse_translate(const struct vc_data *conp, int glyph, + int use_unicode); extern unsigned short *set_translate(int m, struct vc_data *vc); extern int conv_uni_to_pc(struct vc_data *conp, long ucs); extern u32 conv_8bit_to_uni(unsigned char c); diff --git a/include/linux/selection.h b/include/linux/selection.h index 5b890ef5b59f..34404a019ebf 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -33,21 +33,23 @@ extern unsigned char default_red[]; extern unsigned char default_grn[]; extern unsigned char default_blu[]; -extern unsigned short *screen_pos(struct vc_data *vc, int w_offset, int viewed); -extern u16 screen_glyph(struct vc_data *vc, int offset); -extern u32 screen_glyph_unicode(struct vc_data *vc, int offset); +extern unsigned short *screen_pos(const struct vc_data *vc, int w_offset, + int viewed); +extern u16 screen_glyph(const struct vc_data *vc, int offset); +extern u32 screen_glyph_unicode(const struct vc_data *vc, int offset); extern void complement_pos(struct vc_data *vc, int offset); extern void invert_screen(struct vc_data *vc, int offset, int count, int shift); -extern void getconsxy(struct vc_data *vc, unsigned char *p); +extern void getconsxy(const struct vc_data *vc, unsigned char *p); extern void putconsxy(struct vc_data *vc, unsigned char *p); -extern u16 vcs_scr_readw(struct vc_data *vc, const u16 *org); +extern u16 vcs_scr_readw(const struct vc_data *vc, const u16 *org); extern void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org); extern void vcs_scr_updated(struct vc_data *vc); extern int vc_uniscr_check(struct vc_data *vc); -extern void vc_uniscr_copy_line(struct vc_data *vc, void *dest, int viewed, +extern void vc_uniscr_copy_line(const struct vc_data *vc, void *dest, + int viewed, unsigned int row, unsigned int col, unsigned int nr); -- cgit v1.2.3 From a5c6bd806dd626103db38dee77796fe758c8df94 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 18 Aug 2020 10:56:52 +0200 Subject: vt: declare xy for get/putconsxy properly That is: 1) call the parameter 'xy' to denote what it really is, not generic 'p' 2) tell the compiler and users that we expect an array: * with at least 2 chars (static 2) * which we don't modify in putconsxy (const) Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20200818085706.12163-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/selection.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/selection.h b/include/linux/selection.h index 34404a019ebf..15e36e7ef869 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -40,8 +40,8 @@ extern u32 screen_glyph_unicode(const struct vc_data *vc, int offset); extern void complement_pos(struct vc_data *vc, int offset); extern void invert_screen(struct vc_data *vc, int offset, int count, int shift); -extern void getconsxy(const struct vc_data *vc, unsigned char *p); -extern void putconsxy(struct vc_data *vc, unsigned char *p); +extern void getconsxy(const struct vc_data *vc, unsigned char xy[static 2]); +extern void putconsxy(struct vc_data *vc, unsigned char xy[static const 2]); extern u16 vcs_scr_readw(const struct vc_data *vc, const u16 *org); extern void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org); -- cgit v1.2.3 From b8209f694f7f4256181deea92d30eedb908d6788 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 18 Aug 2020 10:56:53 +0200 Subject: vc: propagate "viewed as bool" from screenpos up viewed is used as a flag, i.e. bool. So treat is as such in most of the places. vcs_vc is handled in the next patch. Note: the last parameter of invert_screen was misnamed in the declaration since 1.1.92. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20200818085706.12163-3-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/selection.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/selection.h b/include/linux/selection.h index 15e36e7ef869..170ef28ff26b 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -34,11 +34,11 @@ extern unsigned char default_grn[]; extern unsigned char default_blu[]; extern unsigned short *screen_pos(const struct vc_data *vc, int w_offset, - int viewed); + bool viewed); extern u16 screen_glyph(const struct vc_data *vc, int offset); extern u32 screen_glyph_unicode(const struct vc_data *vc, int offset); extern void complement_pos(struct vc_data *vc, int offset); -extern void invert_screen(struct vc_data *vc, int offset, int count, int shift); +extern void invert_screen(struct vc_data *vc, int offset, int count, bool viewed); extern void getconsxy(const struct vc_data *vc, unsigned char xy[static 2]); extern void putconsxy(struct vc_data *vc, unsigned char xy[static const 2]); @@ -49,7 +49,7 @@ extern void vcs_scr_updated(struct vc_data *vc); extern int vc_uniscr_check(struct vc_data *vc); extern void vc_uniscr_copy_line(const struct vc_data *vc, void *dest, - int viewed, + bool viewed, unsigned int row, unsigned int col, unsigned int nr); -- cgit v1.2.3 From 4524ac56cdcabf77b734ec8021089cba59cac1ac Mon Sep 17 00:00:00 2001 From: Mayulong Date: Mon, 17 Aug 2020 09:10:30 +0200 Subject: staging: mfd: add a PMIC driver for HiSilicon 6421 SPMI version Add the PMIC SPMI driver for the HiSilicon 6421v600. [mchehab+huawei@kernel.org: keep just the MFD driver on this patch, and renamed filenames to better match other upstream drivers] The compete patch is at: https://github.com/96boards-hikey/linux/commit/08464419fba2 Signed-off-by: Mayulong Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/4ffb2694244baa47387e39e2c5d71243242c1fc1.1597647359.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/hi6421-spmi-pmic.h | 165 +++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 include/linux/mfd/hi6421-spmi-pmic.h (limited to 'include/linux') diff --git a/include/linux/mfd/hi6421-spmi-pmic.h b/include/linux/mfd/hi6421-spmi-pmic.h new file mode 100644 index 000000000000..939b36f617c1 --- /dev/null +++ b/include/linux/mfd/hi6421-spmi-pmic.h @@ -0,0 +1,165 @@ +/* + * Header file for device driver Hi6421 PMIC + * + * Copyright (c) 2013 Linaro Ltd. + * Copyright (C) 2011 Hisilicon. + * + * Guodong Xu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef __HISI_PMIC_H +#define __HISI_PMIC_H + +#include + +#define HISI_REGS_ENA_PROTECT_TIME (0) /* in microseconds */ +#define HISI_ECO_MODE_ENABLE (1) +#define HISI_ECO_MODE_DISABLE (0) + +typedef int (*pmic_ocp_callback)(char *); +extern int hisi_pmic_special_ocp_register(char *power_name, pmic_ocp_callback handler); + +struct irq_mask_info { + int start_addr; + int array; +}; + +struct irq_info { + int start_addr; + int array; +}; + +struct bit_info { + int addr; + int bit; +}; + +struct write_lock { + int addr; + int val; +}; + +struct hisi_pmic { + struct resource *res; + struct device *dev; + void __iomem *regs; + spinlock_t lock; + struct irq_domain *domain; + int irq; + int gpio; + unsigned int *irqs; + int irqnum; + int irqarray; + struct irq_mask_info irq_mask_addr; + struct irq_info irq_addr; + int irqnum1; + int irqarray1; + struct irq_mask_info irq_mask_addr1; + struct irq_info irq_addr1; + struct write_lock normal_lock; + struct write_lock debug_lock; +}; + +/* 0:disable; 1:enable */ +unsigned int get_uv_mntn_status(void); +void clear_uv_mntn_resered_reg_bit(void); +void set_uv_mntn_resered_reg_bit(void); + +#if defined(CONFIG_HISI_PMIC) || defined(CONFIG_HISI_PMIC_PMU_SPMI) +/* Register Access Helpers */ +u32 hisi_pmic_read(struct hisi_pmic *pmic, int reg); +void hisi_pmic_write(struct hisi_pmic *pmic, int reg, u32 val); +void hisi_pmic_rmw(struct hisi_pmic *pmic, int reg, u32 mask, u32 bits); +unsigned int hisi_pmic_reg_read(int addr); +void hisi_pmic_reg_write(int addr, int val); +void hisi_pmic_reg_write_lock(int addr, int val); +int hisi_pmic_array_read(int addr, char *buff, unsigned int len); +int hisi_pmic_array_write(int addr, char *buff, unsigned int len); +extern int hisi_get_pmic_irq_byname(unsigned int pmic_irq_list); +extern int hisi_pmic_get_vbus_status(void); +#if defined(CONFIG_HISI_DIEID) +u32 hisi_pmic_read_sub_pmu(u8 sid ,int reg); +void hisi_pmic_write_sub_pmu(u8 sid ,int reg, u32 val); +#endif +#else +static inline u32 hisi_pmic_read(struct hisi_pmic *pmic, int reg) { return 0; } +static inline void hisi_pmic_write(struct hisi_pmic *pmic, int reg, u32 val) {} +static inline void hisi_pmic_rmw(struct hisi_pmic *pmic, int reg, u32 mask, u32 bits) {} +static inline unsigned int hisi_pmic_reg_read(int addr) { return 0; } +static inline void hisi_pmic_reg_write(int addr, int val) {} +static inline void hisi_pmic_reg_write_lock(int addr, int val) {} +static inline int hisi_pmic_array_read(int addr, char *buff, unsigned int len) { return 0; } +static inline int hisi_pmic_array_write(int addr, char *buff, unsigned int len) { return 0; } +static inline int hisi_get_pmic_irq_byname(unsigned int pmic_irq_list) { return -1; } +static inline int hisi_pmic_get_vbus_status(void) { return 1; } +static inline u32 hisi_pmic_read_sub_pmu(u8 sid ,int reg) { return 0; } +static inline void hisi_pmic_write_sub_pmu(u8 sid ,int reg, u32 val) {} +#endif + +#ifdef CONFIG_HISI_HI6421V500_PMU +enum pmic_irq_list { + POR_D45MR = 0, + VBUS_CONNECT, + VBUS_DISCONNECT, + ALARMON_R, + HOLD_6S, + HOLD_1S, + POWERKEY_UP, + POWERKEY_DOWN, + OCP_SCP_R, + COUL_R, + VSYS_OV, + VSYS_UV, + VSYS_PWROFF_ABS, + VSYS_PWROFF_DEB, + THSD_OTMP140, + THSD_OTMP125, + HRESETN, + SIM0_HPD_R = 24, + SIM0_HPD_F, + SIM0_HPD_H, + SIM0_HPD_L, + SIM1_HPD_R, + SIM1_HPD_F, + SIM1_HPD_H, + SIM1_HPD_L, + PMIC_IRQ_LIST_MAX, +}; +#else +enum pmic_irq_list { + OTMP = 0, + VBUS_CONNECT, + VBUS_DISCONNECT, + ALARMON_R, + HOLD_6S, + HOLD_1S, + POWERKEY_UP, + POWERKEY_DOWN, + OCP_SCP_R, + COUL_R, + SIM0_HPD_R, + SIM0_HPD_F, + SIM1_HPD_R, + SIM1_HPD_F, + PMIC_IRQ_LIST_MAX, +}; +#endif + +#ifdef CONFIG_HISI_SR_DEBUG +extern void get_ip_regulator_state(void); +#endif +#endif /* __HISI_PMIC_H */ + -- cgit v1.2.3 From 489b1a36b111b6a3a4ac1acf29de5cf154810887 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Aug 2020 09:10:31 +0200 Subject: staging: mfd: hi6421-spmi-pmic: get rid of unused code There are some checks there which could make sense for downstream builds, but doesn't make much sense for upstream ones. They came from the official Hikey970 tree from Linaro, but even there, the commented-out code is not set via other Kconfig vars. So, let's just get rid of that. If needed later, this patch can be (partially?) reversed. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/ecbef801f6c32ba0850ad9e5c534a4304807df3b.1597647359.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/hi6421-spmi-pmic.h | 42 ------------------------------------ 1 file changed, 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/hi6421-spmi-pmic.h b/include/linux/mfd/hi6421-spmi-pmic.h index 939b36f617c1..5be9b4d3f207 100644 --- a/include/linux/mfd/hi6421-spmi-pmic.h +++ b/include/linux/mfd/hi6421-spmi-pmic.h @@ -78,7 +78,6 @@ unsigned int get_uv_mntn_status(void); void clear_uv_mntn_resered_reg_bit(void); void set_uv_mntn_resered_reg_bit(void); -#if defined(CONFIG_HISI_PMIC) || defined(CONFIG_HISI_PMIC_PMU_SPMI) /* Register Access Helpers */ u32 hisi_pmic_read(struct hisi_pmic *pmic, int reg); void hisi_pmic_write(struct hisi_pmic *pmic, int reg, u32 val); @@ -90,11 +89,6 @@ int hisi_pmic_array_read(int addr, char *buff, unsigned int len); int hisi_pmic_array_write(int addr, char *buff, unsigned int len); extern int hisi_get_pmic_irq_byname(unsigned int pmic_irq_list); extern int hisi_pmic_get_vbus_status(void); -#if defined(CONFIG_HISI_DIEID) -u32 hisi_pmic_read_sub_pmu(u8 sid ,int reg); -void hisi_pmic_write_sub_pmu(u8 sid ,int reg, u32 val); -#endif -#else static inline u32 hisi_pmic_read(struct hisi_pmic *pmic, int reg) { return 0; } static inline void hisi_pmic_write(struct hisi_pmic *pmic, int reg, u32 val) {} static inline void hisi_pmic_rmw(struct hisi_pmic *pmic, int reg, u32 mask, u32 bits) {} @@ -107,38 +101,7 @@ static inline int hisi_get_pmic_irq_byname(unsigned int pmic_irq_list) { return static inline int hisi_pmic_get_vbus_status(void) { return 1; } static inline u32 hisi_pmic_read_sub_pmu(u8 sid ,int reg) { return 0; } static inline void hisi_pmic_write_sub_pmu(u8 sid ,int reg, u32 val) {} -#endif -#ifdef CONFIG_HISI_HI6421V500_PMU -enum pmic_irq_list { - POR_D45MR = 0, - VBUS_CONNECT, - VBUS_DISCONNECT, - ALARMON_R, - HOLD_6S, - HOLD_1S, - POWERKEY_UP, - POWERKEY_DOWN, - OCP_SCP_R, - COUL_R, - VSYS_OV, - VSYS_UV, - VSYS_PWROFF_ABS, - VSYS_PWROFF_DEB, - THSD_OTMP140, - THSD_OTMP125, - HRESETN, - SIM0_HPD_R = 24, - SIM0_HPD_F, - SIM0_HPD_H, - SIM0_HPD_L, - SIM1_HPD_R, - SIM1_HPD_F, - SIM1_HPD_H, - SIM1_HPD_L, - PMIC_IRQ_LIST_MAX, -}; -#else enum pmic_irq_list { OTMP = 0, VBUS_CONNECT, @@ -156,10 +119,5 @@ enum pmic_irq_list { SIM1_HPD_F, PMIC_IRQ_LIST_MAX, }; -#endif - -#ifdef CONFIG_HISI_SR_DEBUG -extern void get_ip_regulator_state(void); -#endif #endif /* __HISI_PMIC_H */ -- cgit v1.2.3 From 4b5e9b39e7dd9e6b980ad588f1f7a36fe7cda044 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Aug 2020 09:10:33 +0200 Subject: staging: mfd: hi6421-spmi-pmic: get rid of the static vars There are several static vars inside this driver. Get rid of them. While here, add a SPDX header file. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/19c497fc2bb1d3a95863d92cac89869d5abe3f2e.1597647359.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/hi6421-spmi-pmic.h | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/hi6421-spmi-pmic.h b/include/linux/mfd/hi6421-spmi-pmic.h index 5be9b4d3f207..e0a8b50f95fc 100644 --- a/include/linux/mfd/hi6421-spmi-pmic.h +++ b/include/linux/mfd/hi6421-spmi-pmic.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Header file for device driver Hi6421 PMIC * @@ -5,19 +6,6 @@ * Copyright (C) 2011 Hisilicon. * * Guodong Xu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef __HISI_PMIC_H @@ -25,12 +13,12 @@ #include -#define HISI_REGS_ENA_PROTECT_TIME (0) /* in microseconds */ +#define HISI_REGS_ENA_PROTECT_TIME (0) /* in microseconds */ #define HISI_ECO_MODE_ENABLE (1) #define HISI_ECO_MODE_DISABLE (0) typedef int (*pmic_ocp_callback)(char *); -extern int hisi_pmic_special_ocp_register(char *power_name, pmic_ocp_callback handler); +int hisi_pmic_special_ocp_register(char *power_name, pmic_ocp_callback handler); struct irq_mask_info { int start_addr; @@ -71,6 +59,8 @@ struct hisi_pmic { struct irq_info irq_addr1; struct write_lock normal_lock; struct write_lock debug_lock; + + unsigned int g_extinterrupt_flag; }; /* 0:disable; 1:enable */ -- cgit v1.2.3 From 4860b39f545fd96805c6400bd185de4fb383a2c7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Aug 2020 09:10:34 +0200 Subject: staging: mfd: hi6421-spmi-pmic: cleanup hi6421-spmi-pmic.h header There are several external vars that are defined there, which are not needed anymore. Get rid of them. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/3dbc3f3876275404153da52b84e5dcef09faf644.1597647359.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/hi6421-spmi-pmic.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/hi6421-spmi-pmic.h b/include/linux/mfd/hi6421-spmi-pmic.h index e0a8b50f95fc..1f986dd5f31c 100644 --- a/include/linux/mfd/hi6421-spmi-pmic.h +++ b/include/linux/mfd/hi6421-spmi-pmic.h @@ -63,34 +63,9 @@ struct hisi_pmic { unsigned int g_extinterrupt_flag; }; -/* 0:disable; 1:enable */ -unsigned int get_uv_mntn_status(void); -void clear_uv_mntn_resered_reg_bit(void); -void set_uv_mntn_resered_reg_bit(void); - -/* Register Access Helpers */ u32 hisi_pmic_read(struct hisi_pmic *pmic, int reg); void hisi_pmic_write(struct hisi_pmic *pmic, int reg, u32 val); void hisi_pmic_rmw(struct hisi_pmic *pmic, int reg, u32 mask, u32 bits); -unsigned int hisi_pmic_reg_read(int addr); -void hisi_pmic_reg_write(int addr, int val); -void hisi_pmic_reg_write_lock(int addr, int val); -int hisi_pmic_array_read(int addr, char *buff, unsigned int len); -int hisi_pmic_array_write(int addr, char *buff, unsigned int len); -extern int hisi_get_pmic_irq_byname(unsigned int pmic_irq_list); -extern int hisi_pmic_get_vbus_status(void); -static inline u32 hisi_pmic_read(struct hisi_pmic *pmic, int reg) { return 0; } -static inline void hisi_pmic_write(struct hisi_pmic *pmic, int reg, u32 val) {} -static inline void hisi_pmic_rmw(struct hisi_pmic *pmic, int reg, u32 mask, u32 bits) {} -static inline unsigned int hisi_pmic_reg_read(int addr) { return 0; } -static inline void hisi_pmic_reg_write(int addr, int val) {} -static inline void hisi_pmic_reg_write_lock(int addr, int val) {} -static inline int hisi_pmic_array_read(int addr, char *buff, unsigned int len) { return 0; } -static inline int hisi_pmic_array_write(int addr, char *buff, unsigned int len) { return 0; } -static inline int hisi_get_pmic_irq_byname(unsigned int pmic_irq_list) { return -1; } -static inline int hisi_pmic_get_vbus_status(void) { return 1; } -static inline u32 hisi_pmic_read_sub_pmu(u8 sid ,int reg) { return 0; } -static inline void hisi_pmic_write_sub_pmu(u8 sid ,int reg, u32 val) {} enum pmic_irq_list { OTMP = 0, @@ -110,4 +85,3 @@ enum pmic_irq_list { PMIC_IRQ_LIST_MAX, }; #endif /* __HISI_PMIC_H */ - -- cgit v1.2.3 From bd07d62a47290ca3ceee58f373fa05464edc6eb5 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Aug 2020 09:10:36 +0200 Subject: staging: mfd: hi6421-spmi-pmic: get rid of unused OF properties There are several OF properties that aren't used by Hikey 970, and some are not even used inside the driver. So, drop them, as as this makes easier to document what's actually used. If latter needed, those could be re-added later. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/332f96c178b81bf1e9908a1da2127f043909ae0c.1597647359.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/hi6421-spmi-pmic.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/hi6421-spmi-pmic.h b/include/linux/mfd/hi6421-spmi-pmic.h index 1f986dd5f31c..41b61de48259 100644 --- a/include/linux/mfd/hi6421-spmi-pmic.h +++ b/include/linux/mfd/hi6421-spmi-pmic.h @@ -48,19 +48,11 @@ struct hisi_pmic { struct irq_domain *domain; int irq; int gpio; - unsigned int *irqs; + unsigned int *irqs; int irqnum; int irqarray; - struct irq_mask_info irq_mask_addr; - struct irq_info irq_addr; - int irqnum1; - int irqarray1; - struct irq_mask_info irq_mask_addr1; - struct irq_info irq_addr1; - struct write_lock normal_lock; - struct write_lock debug_lock; - - unsigned int g_extinterrupt_flag; + struct irq_mask_info irq_mask_addr; + struct irq_info irq_addr; }; u32 hisi_pmic_read(struct hisi_pmic *pmic, int reg); -- cgit v1.2.3 From 1eb2784a90925d48500f6baac1fa1870085c4121 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Aug 2020 09:10:38 +0200 Subject: staging: mfd: hi6421-spmi-pmic: change namespace on its functions Rename the functions used internally inside the driver in order for them to follow the driver's name. While here, get rid of some unused definitions at the header file. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/bfa8bf33f71612b1511d73269ca242d0d4e70940.1597647359.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/hi6421-spmi-pmic.h | 51 ++++++++++++++---------------------- 1 file changed, 19 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/hi6421-spmi-pmic.h b/include/linux/mfd/hi6421-spmi-pmic.h index 41b61de48259..d12ad7484018 100644 --- a/include/linux/mfd/hi6421-spmi-pmic.h +++ b/include/linux/mfd/hi6421-spmi-pmic.h @@ -17,49 +17,36 @@ #define HISI_ECO_MODE_ENABLE (1) #define HISI_ECO_MODE_DISABLE (0) -typedef int (*pmic_ocp_callback)(char *); -int hisi_pmic_special_ocp_register(char *power_name, pmic_ocp_callback handler); - -struct irq_mask_info { +struct hi6421_spmi_irq_mask_info { int start_addr; int array; }; -struct irq_info { +struct hi6421_spmi_irq_info { int start_addr; int array; }; -struct bit_info { - int addr; - int bit; -}; - -struct write_lock { - int addr; - int val; -}; - -struct hisi_pmic { - struct resource *res; - struct device *dev; - void __iomem *regs; - spinlock_t lock; - struct irq_domain *domain; - int irq; - int gpio; - unsigned int *irqs; - int irqnum; - int irqarray; - struct irq_mask_info irq_mask_addr; - struct irq_info irq_addr; +struct hi6421_spmi_pmic { + struct resource *res; + struct device *dev; + void __iomem *regs; + spinlock_t lock; + struct irq_domain *domain; + int irq; + int gpio; + unsigned int *irqs; + int irqnum; + int irqarray; + struct hi6421_spmi_irq_mask_info irq_mask_addr; + struct hi6421_spmi_irq_info irq_addr; }; -u32 hisi_pmic_read(struct hisi_pmic *pmic, int reg); -void hisi_pmic_write(struct hisi_pmic *pmic, int reg, u32 val); -void hisi_pmic_rmw(struct hisi_pmic *pmic, int reg, u32 mask, u32 bits); +u32 hi6421_spmi_pmic_read(struct hi6421_spmi_pmic *pmic, int reg); +void hi6421_spmi_pmic_write(struct hi6421_spmi_pmic *pmic, int reg, u32 val); +void hi6421_spmi_pmic_rmw(struct hi6421_spmi_pmic *pmic, int reg, u32 mask, u32 bits); -enum pmic_irq_list { +enum hi6421_spmi_pmic_irq_list { OTMP = 0, VBUS_CONNECT, VBUS_DISCONNECT, -- cgit v1.2.3 From 4d70881afdeb261ee318447ed0232b96946ecabc Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Aug 2020 09:10:39 +0200 Subject: staging: mfd: hi6421-spmi-pmic: fix some coding style issues Checkpatch complains about some minor issues inside this driver that were not addressed by the previous patch. Address them. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/84b53d20632c84cc60b8dadfe937f3c54b355cef.1597647359.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/hi6421-spmi-pmic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/hi6421-spmi-pmic.h b/include/linux/mfd/hi6421-spmi-pmic.h index d12ad7484018..403fd8bb45fa 100644 --- a/include/linux/mfd/hi6421-spmi-pmic.h +++ b/include/linux/mfd/hi6421-spmi-pmic.h @@ -38,7 +38,7 @@ struct hi6421_spmi_pmic { unsigned int *irqs; int irqnum; int irqarray; - struct hi6421_spmi_irq_mask_info irq_mask_addr; + struct hi6421_spmi_irq_mask_info irq_mask_addr; struct hi6421_spmi_irq_info irq_addr; }; -- cgit v1.2.3 From 6b946699252c68b0c792896eded217269c9aa286 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Aug 2020 09:10:41 +0200 Subject: staging: mfd: hi6421-spmi-pmic: cleanup the code There are several small cleanups that can be done in order to make the code more prepared to be upstreamed. Suggested-by: Jonathan Cameron Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/823792ba2f69e613629ab52a33e5728d54e2288b.1597647359.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/hi6421-spmi-pmic.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/hi6421-spmi-pmic.h b/include/linux/mfd/hi6421-spmi-pmic.h index 403fd8bb45fa..ff3adfa7b3ec 100644 --- a/include/linux/mfd/hi6421-spmi-pmic.h +++ b/include/linux/mfd/hi6421-spmi-pmic.h @@ -36,15 +36,17 @@ struct hi6421_spmi_pmic { int irq; int gpio; unsigned int *irqs; + int irqnum; int irqarray; - struct hi6421_spmi_irq_mask_info irq_mask_addr; - struct hi6421_spmi_irq_info irq_addr; + int irq_mask_addr; + int irq_addr; }; -u32 hi6421_spmi_pmic_read(struct hi6421_spmi_pmic *pmic, int reg); -void hi6421_spmi_pmic_write(struct hi6421_spmi_pmic *pmic, int reg, u32 val); -void hi6421_spmi_pmic_rmw(struct hi6421_spmi_pmic *pmic, int reg, u32 mask, u32 bits); +int hi6421_spmi_pmic_read(struct hi6421_spmi_pmic *pmic, int reg); +int hi6421_spmi_pmic_write(struct hi6421_spmi_pmic *pmic, int reg, u32 val); +int hi6421_spmi_pmic_rmw(struct hi6421_spmi_pmic *pmic, int reg, + u32 mask, u32 bits); enum hi6421_spmi_pmic_irq_list { OTMP = 0, -- cgit v1.2.3 From 9f0c4fa111dc909ca545c45ea20ec84da555ce16 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 23 Jul 2020 10:11:10 -0700 Subject: perf/core: Add a new PERF_EV_CAP_SIBLING event capability Current perf assumes that events in a group are independent. Close an event doesn't impact the value of the other events in the same group. If the closed event is a member, after the event closure, other events are still running like a group. If the closed event is a leader, other events are running as singleton events. Add PERF_EV_CAP_SIBLING to allow events to indicate they require being part of a group, and when the leader dies they cannot exist independently. Suggested-by: Peter Zijlstra Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200723171117.9918-8-kan.liang@linux.intel.com --- include/linux/perf_event.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 04a49ccc7beb..6048650f8c1d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -576,9 +576,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, * PERF_EV_CAP_SOFTWARE: Is a software event. * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read * from any CPU in the package where it is active. + * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and + * cannot be a group leader. If an event with this flag is detached from the + * group it is scheduled out and moved into an unrecoverable ERROR state. */ #define PERF_EV_CAP_SOFTWARE BIT(0) #define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1) +#define PERF_EV_CAP_SIBLING BIT(2) #define SWEVENT_HLIST_BITS 8 #define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) -- cgit v1.2.3 From 2cb5383b30d47c446ec7d884cd80f93ffcc31817 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 23 Jul 2020 10:11:14 -0700 Subject: perf/x86/intel: Support per-thread RDPMC TopDown metrics Starts from Ice Lake, the TopDown metrics are directly available as fixed counters and do not require generic counters. Also, the TopDown metrics can be collected per thread. Extend the RDPMC usage to support per-thread TopDown metrics. The RDPMC index of the PERF_METRICS will be output if RDPMC users ask for the RDPMC index of the metrics events. To support per thread RDPMC TopDown, the metrics and slots counters have to be saved/restored during the context switching. The last_period and period_left are not used in the counting mode. Use the fields for saved_metric and saved_slots. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200723171117.9918-12-kan.liang@linux.intel.com --- include/linux/perf_event.h | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6048650f8c1d..46a3974eb4fe 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -212,17 +212,26 @@ struct hw_perf_event { */ u64 sample_period; - /* - * The period we started this sample with. - */ - u64 last_period; + union { + struct { /* Sampling */ + /* + * The period we started this sample with. + */ + u64 last_period; - /* - * However much is left of the current period; note that this is - * a full 64bit value and allows for generation of periods longer - * than hardware might allow. - */ - local64_t period_left; + /* + * However much is left of the current period; + * note that this is a full 64bit value and + * allows for generation of periods longer + * than hardware might allow. + */ + local64_t period_left; + }; + struct { /* Topdown events counting for context switch */ + u64 saved_metric; + u64 saved_slots; + }; + }; /* * State for throttling the event, see __perf_event_overflow() and -- cgit v1.2.3 From b240d0143bfbc96f610405f978e4753fd663cbfc Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 18 Aug 2020 16:58:54 +0200 Subject: staging: mfd: hi6421-spmi-pmic: get rid of interrupt properties Both irqnum and irqarray properties reflect the same thing: the number of bits and bytes for interrupts at this chipset. E. g.: irqnum = 8 x irqarray This can be seen by the way pending interrupts are handled: /* During probe time */ pmic->irqs = devm_kzalloc(dev, pmic->irqnum * sizeof(int), GFP_KERNEL); /* While handling IRQs */ for (i = 0; i < pmic->irqarray; i++) { pending = hi6421_spmi_pmic_read(pmic, (i + pmic->irq_addr)); pending &= 0xff; for_each_set_bit(offset, &pending, 8) generic_handle_irq(pmic->irqs[offset + i * 8]); } Going further, there are some logic at the driver which assumes that irqarray is 2: /* solve powerkey order */ if ((i == HISI_IRQ_KEY_NUM) && ((pending & HISI_IRQ_KEY_VALUE) == HISI_IRQ_KEY_VALUE)) { generic_handle_irq(pmic->irqs[HISI_IRQ_KEY_DOWN]); generic_handle_irq(pmic->irqs[HISI_IRQ_KEY_UP]); pending &= (~HISI_IRQ_KEY_VALUE); } As HISI_IRQ_KEY_DOWN and HISI_IRQ_KEY_UP are fixed values and don't depend on irqnum/irqarray. The IRQ addr and mask addr seem to be also fixed, based on some comments at the OF parsing code. So, get rid of them too, removing the of parsing function completely. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/e231244e42cb5b56240705cac2f987e11a078038.1597762400.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/hi6421-spmi-pmic.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/hi6421-spmi-pmic.h b/include/linux/mfd/hi6421-spmi-pmic.h index ff3adfa7b3ec..2c8896fd852e 100644 --- a/include/linux/mfd/hi6421-spmi-pmic.h +++ b/include/linux/mfd/hi6421-spmi-pmic.h @@ -17,16 +17,6 @@ #define HISI_ECO_MODE_ENABLE (1) #define HISI_ECO_MODE_DISABLE (0) -struct hi6421_spmi_irq_mask_info { - int start_addr; - int array; -}; - -struct hi6421_spmi_irq_info { - int start_addr; - int array; -}; - struct hi6421_spmi_pmic { struct resource *res; struct device *dev; @@ -36,11 +26,6 @@ struct hi6421_spmi_pmic { int irq; int gpio; unsigned int *irqs; - - int irqnum; - int irqarray; - int irq_mask_addr; - int irq_addr; }; int hi6421_spmi_pmic_read(struct hi6421_spmi_pmic *pmic, int reg); -- cgit v1.2.3 From a21a4391f20c0ab45db452e22bc3e8afe8b36e46 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 8 Jul 2020 16:39:24 +0000 Subject: x86/resctrl: Include pid.h We are about to disturb the header soup. This header uses struct pid and struct pid_namespace. Include their header. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Link: https://lkml.kernel.org/r/20200708163929.2783-6-james.morse@arm.com --- include/linux/resctrl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index daf5cf64c6a6..9b05af9b3e28 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -2,6 +2,8 @@ #ifndef _RESCTRL_H #define _RESCTRL_H +#include + #ifdef CONFIG_PROC_CPU_RESCTRL int proc_resctrl_show(struct seq_file *m, -- cgit v1.2.3 From cfe7ddcbd72dc67ce5749cc6f451a2b0c6aec5b5 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 17 Aug 2020 12:29:47 +0100 Subject: ARM, sched/topology: Remove SD_SHARE_POWERDOMAIN This flag was introduced in 2014 by commit: d77b3ed5c9f8 ("sched: Add a new SD_SHARE_POWERDOMAIN for sched_domain") but AFAIA it was never leveraged by the scheduler. The closest thing I can think of is EAS caring about frequency domains, and it does that by leveraging performance domains. Remove the flag. No change in functionality is expected. Suggested-by: Morten Rasmussen Signed-off-by: Valentin Schneider Signed-off-by: Ingo Molnar Reviewed-by: Dietmar Eggemann Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20200817113003.20802-2-valentin.schneider@arm.com --- include/linux/sched/topology.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 820511289857..6ec7d7c1d1e3 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -18,13 +18,12 @@ #define SD_WAKE_AFFINE 0x0010 /* Wake task to waking CPU */ #define SD_ASYM_CPUCAPACITY 0x0020 /* Domain members have different CPU capacities */ #define SD_SHARE_CPUCAPACITY 0x0040 /* Domain members share CPU capacity */ -#define SD_SHARE_POWERDOMAIN 0x0080 /* Domain members share power domain */ -#define SD_SHARE_PKG_RESOURCES 0x0100 /* Domain members share CPU pkg resources */ -#define SD_SERIALIZE 0x0200 /* Only a single load balancing instance */ -#define SD_ASYM_PACKING 0x0400 /* Place busy groups earlier in the domain */ -#define SD_PREFER_SIBLING 0x0800 /* Prefer to place tasks in a sibling domain */ -#define SD_OVERLAP 0x1000 /* sched_domains of this level overlap */ -#define SD_NUMA 0x2000 /* cross-node balancing */ +#define SD_SHARE_PKG_RESOURCES 0x0080 /* Domain members share CPU pkg resources */ +#define SD_SERIALIZE 0x0100 /* Only a single load balancing instance */ +#define SD_ASYM_PACKING 0x0200 /* Place busy groups earlier in the domain */ +#define SD_PREFER_SIBLING 0x0400 /* Prefer to place tasks in a sibling domain */ +#define SD_OVERLAP 0x0800 /* sched_domains of this level overlap */ +#define SD_NUMA 0x1000 /* cross-node balancing */ #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) -- cgit v1.2.3 From d54a9658a75633b839af7a2c6c758807678b8064 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 17 Aug 2020 12:29:49 +0100 Subject: sched/topology: Split out SD_* flags declaration to its own file To associate the SD flags with some metadata, we need some more structure in the way they are declared. Rather than shove that in a free-standing macro list, move the declaration in a separate file that can be re-imported with different SD_FLAG definitions. This is inspired by what is done with the syscall table (see uapi/asm/unistd.h and sys_call_table). The value assigned to a given SD flag now depends on the order it appears in sd_flags.h. No change in functionality. Signed-off-by: Valentin Schneider Signed-off-by: Ingo Molnar Reviewed-by: Dietmar Eggemann Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20200817113003.20802-4-valentin.schneider@arm.com --- include/linux/sched/sd_flags.h | 35 +++++++++++++++++++++++++++++++++++ include/linux/sched/topology.h | 26 +++++++++++++------------- 2 files changed, 48 insertions(+), 13 deletions(-) create mode 100644 include/linux/sched/sd_flags.h (limited to 'include/linux') diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h new file mode 100644 index 000000000000..373dc45c024e --- /dev/null +++ b/include/linux/sched/sd_flags.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * sched-domains (multiprocessor balancing) flag declarations. + */ + +#ifndef SD_FLAG +# error "Incorrect import of SD flags definitions" +#endif + +/* Balance when about to become idle */ +SD_FLAG(SD_BALANCE_NEWIDLE) +/* Balance on exec */ +SD_FLAG(SD_BALANCE_EXEC) +/* Balance on fork, clone */ +SD_FLAG(SD_BALANCE_FORK) +/* Balance on wakeup */ +SD_FLAG(SD_BALANCE_WAKE) +/* Wake task to waking CPU */ +SD_FLAG(SD_WAKE_AFFINE) +/* Domain members have different CPU capacities */ +SD_FLAG(SD_ASYM_CPUCAPACITY) +/* Domain members share CPU capacity */ +SD_FLAG(SD_SHARE_CPUCAPACITY) +/* Domain members share CPU pkg resources */ +SD_FLAG(SD_SHARE_PKG_RESOURCES) +/* Only a single load balancing instance */ +SD_FLAG(SD_SERIALIZE) +/* Place busy groups earlier in the domain */ +SD_FLAG(SD_ASYM_PACKING) +/* Prefer to place tasks in a sibling domain */ +SD_FLAG(SD_PREFER_SIBLING) +/* sched_domains of this level overlap */ +SD_FLAG(SD_OVERLAP) +/* cross-node balancing */ +SD_FLAG(SD_NUMA) diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 6ec7d7c1d1e3..3e41c0401b5f 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -11,19 +11,19 @@ */ #ifdef CONFIG_SMP -#define SD_BALANCE_NEWIDLE 0x0001 /* Balance when about to become idle */ -#define SD_BALANCE_EXEC 0x0002 /* Balance on exec */ -#define SD_BALANCE_FORK 0x0004 /* Balance on fork, clone */ -#define SD_BALANCE_WAKE 0x0008 /* Balance on wakeup */ -#define SD_WAKE_AFFINE 0x0010 /* Wake task to waking CPU */ -#define SD_ASYM_CPUCAPACITY 0x0020 /* Domain members have different CPU capacities */ -#define SD_SHARE_CPUCAPACITY 0x0040 /* Domain members share CPU capacity */ -#define SD_SHARE_PKG_RESOURCES 0x0080 /* Domain members share CPU pkg resources */ -#define SD_SERIALIZE 0x0100 /* Only a single load balancing instance */ -#define SD_ASYM_PACKING 0x0200 /* Place busy groups earlier in the domain */ -#define SD_PREFER_SIBLING 0x0400 /* Prefer to place tasks in a sibling domain */ -#define SD_OVERLAP 0x0800 /* sched_domains of this level overlap */ -#define SD_NUMA 0x1000 /* cross-node balancing */ +/* Generate SD flag indexes */ +#define SD_FLAG(name) __##name, +enum { + #include + __SD_FLAG_CNT, +}; +#undef SD_FLAG +/* Generate SD flag bits */ +#define SD_FLAG(name) name = 1 << __##name, +enum { + #include +}; +#undef SD_FLAG #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) -- cgit v1.2.3 From b6e862f386722e0de6c37f85f1cf438a0efa7f93 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 17 Aug 2020 12:29:50 +0100 Subject: sched/topology: Define and assign sched_domain flag metadata There are some expectations regarding how sched domain flags should be laid out, but none of them are checked or asserted in sched_domain_debug_one(). After staring at said flags for a while, I've come to realize there's two repeating patterns: - Shared with children: those flags are set from the base CPU domain upwards. Any domain that has it set will have it set in its children. It hints at "some property holds true / some behaviour is enabled until this level". - Shared with parents: those flags are set from the topmost domain downwards. Any domain that has it set will have it set in its parents. It hints at "some property isn't visible / some behaviour is disabled until this level". There are two outliers that (currently) do not map to either of these: o SD_PREFER_SIBLING, which is cleared below levels with SD_ASYM_CPUCAPACITY. The change was introduced by commit: 9c63e84db29b ("sched/core: Disable SD_PREFER_SIBLING on asymmetric CPU capacity domains") as it could break misfit migration on some systems. In light of this, we might want to change it back to make it fit one of the two categories and fix the issue another way. o SD_ASYM_CPUCAPACITY, which gets set on a single level and isn't propagated up nor down. From a topology description point of view, it really wants to be SDF_SHARED_PARENT; this will be rectified in a later patch. Tweak the sched_domain flag declaration to assign each flag an expected layout, and include the rationale for each flag "meta type" assignment as a comment. Consolidate the flag metadata into an array; the index of a flag's metadata can easily be found with log2(flag), IOW __ffs(flag). Signed-off-by: Valentin Schneider Signed-off-by: Ingo Molnar Reviewed-by: Dietmar Eggemann Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20200817113003.20802-5-valentin.schneider@arm.com --- include/linux/sched/sd_flags.h | 147 +++++++++++++++++++++++++++++++++-------- include/linux/sched/topology.h | 15 ++++- 2 files changed, 134 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index 373dc45c024e..c24a45b05fbb 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -7,29 +7,124 @@ # error "Incorrect import of SD flags definitions" #endif -/* Balance when about to become idle */ -SD_FLAG(SD_BALANCE_NEWIDLE) -/* Balance on exec */ -SD_FLAG(SD_BALANCE_EXEC) -/* Balance on fork, clone */ -SD_FLAG(SD_BALANCE_FORK) -/* Balance on wakeup */ -SD_FLAG(SD_BALANCE_WAKE) -/* Wake task to waking CPU */ -SD_FLAG(SD_WAKE_AFFINE) -/* Domain members have different CPU capacities */ -SD_FLAG(SD_ASYM_CPUCAPACITY) -/* Domain members share CPU capacity */ -SD_FLAG(SD_SHARE_CPUCAPACITY) -/* Domain members share CPU pkg resources */ -SD_FLAG(SD_SHARE_PKG_RESOURCES) -/* Only a single load balancing instance */ -SD_FLAG(SD_SERIALIZE) -/* Place busy groups earlier in the domain */ -SD_FLAG(SD_ASYM_PACKING) -/* Prefer to place tasks in a sibling domain */ -SD_FLAG(SD_PREFER_SIBLING) -/* sched_domains of this level overlap */ -SD_FLAG(SD_OVERLAP) -/* cross-node balancing */ -SD_FLAG(SD_NUMA) +/* + * Expected flag uses + * + * SHARED_CHILD: These flags are meant to be set from the base domain upwards. + * If a domain has this flag set, all of its children should have it set. This + * is usually because the flag describes some shared resource (all CPUs in that + * domain share the same resource), or because they are tied to a scheduling + * behaviour that we want to disable at some point in the hierarchy for + * scalability reasons. + * + * In those cases it doesn't make sense to have the flag set for a domain but + * not have it in (some of) its children: sched domains ALWAYS span their child + * domains, so operations done with parent domains will cover CPUs in the lower + * child domains. + * + * + * SHARED_PARENT: These flags are meant to be set from the highest domain + * downwards. If a domain has this flag set, all of its parents should have it + * set. This is usually for topology properties that start to appear above a + * certain level (e.g. domain starts spanning CPUs outside of the base CPU's + * socket). + */ +#define SDF_SHARED_CHILD 0x1 +#define SDF_SHARED_PARENT 0x2 + +/* + * Balance when about to become idle + * + * SHARED_CHILD: Set from the base domain up to cpuset.sched_relax_domain_level. + */ +SD_FLAG(SD_BALANCE_NEWIDLE, SDF_SHARED_CHILD) + +/* + * Balance on exec + * + * SHARED_CHILD: Set from the base domain up to the NUMA reclaim level. + */ +SD_FLAG(SD_BALANCE_EXEC, SDF_SHARED_CHILD) + +/* + * Balance on fork, clone + * + * SHARED_CHILD: Set from the base domain up to the NUMA reclaim level. + */ +SD_FLAG(SD_BALANCE_FORK, SDF_SHARED_CHILD) + +/* + * Balance on wakeup + * + * SHARED_CHILD: Set from the base domain up to cpuset.sched_relax_domain_level. + */ +SD_FLAG(SD_BALANCE_WAKE, SDF_SHARED_CHILD) + +/* + * Consider waking task on waking CPU. + * + * SHARED_CHILD: Set from the base domain up to the NUMA reclaim level. + */ +SD_FLAG(SD_WAKE_AFFINE, SDF_SHARED_CHILD) + +/* + * Domain members have different CPU capacities + */ +SD_FLAG(SD_ASYM_CPUCAPACITY, 0) + +/* + * Domain members share CPU capacity (i.e. SMT) + * + * SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share + * CPU capacity. + */ +SD_FLAG(SD_SHARE_CPUCAPACITY, SDF_SHARED_CHILD) + +/* + * Domain members share CPU package resources (i.e. caches) + * + * SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share + * the same cache(s). + */ +SD_FLAG(SD_SHARE_PKG_RESOURCES, SDF_SHARED_CHILD) + +/* + * Only a single load balancing instance + * + * SHARED_PARENT: Set for all NUMA levels above NODE. Could be set from a + * different level upwards, but it doesn't change that if a domain has this flag + * set, then all of its parents need to have it too (otherwise the serialization + * doesn't make sense). + */ +SD_FLAG(SD_SERIALIZE, SDF_SHARED_PARENT) + +/* + * Place busy tasks earlier in the domain + * + * SHARED_CHILD: Usually set on the SMT level. Technically could be set further + * up, but currently assumed to be set from the base domain upwards (see + * update_top_cache_domain()). + */ +SD_FLAG(SD_ASYM_PACKING, SDF_SHARED_CHILD) + +/* + * Prefer to place tasks in a sibling domain + * + * Set up until domains start spanning NUMA nodes. Close to being a SHARED_CHILD + * flag, but cleared below domains with SD_ASYM_CPUCAPACITY. + */ +SD_FLAG(SD_PREFER_SIBLING, 0) + +/* + * sched_groups of this level overlap + * + * SHARED_PARENT: Set for all NUMA levels above NODE. + */ +SD_FLAG(SD_OVERLAP, SDF_SHARED_PARENT) + +/* + * Cross-node balancing + * + * SHARED_PARENT: Set for all NUMA levels above NODE. + */ +SD_FLAG(SD_NUMA, SDF_SHARED_PARENT) diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 3e41c0401b5f..32f602ff37a0 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -12,19 +12,30 @@ #ifdef CONFIG_SMP /* Generate SD flag indexes */ -#define SD_FLAG(name) __##name, +#define SD_FLAG(name, mflags) __##name, enum { #include __SD_FLAG_CNT, }; #undef SD_FLAG /* Generate SD flag bits */ -#define SD_FLAG(name) name = 1 << __##name, +#define SD_FLAG(name, mflags) name = 1 << __##name, enum { #include }; #undef SD_FLAG +#ifdef CONFIG_SCHED_DEBUG +#define SD_FLAG(_name, mflags) [__##_name] = { .meta_flags = mflags, .name = #_name }, +static const struct { + unsigned int meta_flags; + char *name; +} sd_flag_debug[] = { +#include +}; +#undef SD_FLAG +#endif + #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) { -- cgit v1.2.3 From 4ee4ea443a5dc3fc4d8ae338199676eae9d8ef02 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 17 Aug 2020 12:29:53 +0100 Subject: sched/topology: Introduce SD metaflag for flags needing > 1 groups In preparation of cleaning up the sd_degenerate*() functions, mark flags used in sd_degenerate() with the new SDF_NEEDS_GROUPS flag. With this, build a compile-time mask of those SD flags. Note that sd_parent_degenerate() uses an extra flag in its mask, SD_PREFER_SIBLING, which remains singled out for now. Suggested-by: Peter Zijlstra Signed-off-by: Valentin Schneider Signed-off-by: Ingo Molnar Reviewed-by: Dietmar Eggemann Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20200817113003.20802-8-valentin.schneider@arm.com --- include/linux/sched/sd_flags.h | 39 ++++++++++++++++++++++++++++----------- include/linux/sched/topology.h | 7 +++++++ 2 files changed, 35 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index c24a45b05fbb..ee5cbfc7189f 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -8,7 +8,7 @@ #endif /* - * Expected flag uses + * Hierarchical metaflags * * SHARED_CHILD: These flags are meant to be set from the base domain upwards. * If a domain has this flag set, all of its children should have it set. This @@ -29,29 +29,42 @@ * certain level (e.g. domain starts spanning CPUs outside of the base CPU's * socket). */ -#define SDF_SHARED_CHILD 0x1 -#define SDF_SHARED_PARENT 0x2 +#define SDF_SHARED_CHILD 0x1 +#define SDF_SHARED_PARENT 0x2 + +/* + * Behavioural metaflags + * + * NEEDS_GROUPS: These flags are only relevant if the domain they are set on has + * more than one group. This is usually for balancing flags (load balancing + * involves equalizing a metric between groups), or for flags describing some + * shared resource (which would be shared between groups). + */ +#define SDF_NEEDS_GROUPS 0x4 /* * Balance when about to become idle * * SHARED_CHILD: Set from the base domain up to cpuset.sched_relax_domain_level. + * NEEDS_GROUPS: Load balancing flag. */ -SD_FLAG(SD_BALANCE_NEWIDLE, SDF_SHARED_CHILD) +SD_FLAG(SD_BALANCE_NEWIDLE, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) /* * Balance on exec * * SHARED_CHILD: Set from the base domain up to the NUMA reclaim level. + * NEEDS_GROUPS: Load balancing flag. */ -SD_FLAG(SD_BALANCE_EXEC, SDF_SHARED_CHILD) +SD_FLAG(SD_BALANCE_EXEC, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) /* * Balance on fork, clone * * SHARED_CHILD: Set from the base domain up to the NUMA reclaim level. + * NEEDS_GROUPS: Load balancing flag. */ -SD_FLAG(SD_BALANCE_FORK, SDF_SHARED_CHILD) +SD_FLAG(SD_BALANCE_FORK, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) /* * Balance on wakeup @@ -69,24 +82,28 @@ SD_FLAG(SD_WAKE_AFFINE, SDF_SHARED_CHILD) /* * Domain members have different CPU capacities + * + * NEEDS_GROUPS: Per-CPU capacity is asymmetric between groups. */ -SD_FLAG(SD_ASYM_CPUCAPACITY, 0) +SD_FLAG(SD_ASYM_CPUCAPACITY, SDF_NEEDS_GROUPS) /* * Domain members share CPU capacity (i.e. SMT) * * SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share - * CPU capacity. + * CPU capacity. + * NEEDS_GROUPS: Capacity is shared between groups. */ -SD_FLAG(SD_SHARE_CPUCAPACITY, SDF_SHARED_CHILD) +SD_FLAG(SD_SHARE_CPUCAPACITY, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) /* * Domain members share CPU package resources (i.e. caches) * * SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share - * the same cache(s). + * the same cache(s). + * NEEDS_GROUPS: Caches are shared between groups. */ -SD_FLAG(SD_SHARE_PKG_RESOURCES, SDF_SHARED_CHILD) +SD_FLAG(SD_SHARE_PKG_RESOURCES, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) /* * Only a single load balancing instance diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 32f602ff37a0..2d59ca77103e 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -25,6 +25,13 @@ enum { }; #undef SD_FLAG +/* Generate a mask of SD flags with the SDF_NEEDS_GROUPS metaflag */ +#define SD_FLAG(name, mflags) (name * !!((mflags) & SDF_NEEDS_GROUPS)) | +static const unsigned int SD_DEGENERATE_GROUPS_MASK = +#include +0; +#undef SD_FLAG + #ifdef CONFIG_SCHED_DEBUG #define SD_FLAG(_name, mflags) [__##_name] = { .meta_flags = mflags, .name = #_name }, static const struct { -- cgit v1.2.3 From c200191d4c2c1aa2ffb62a984b756ac1f02dc55c Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 17 Aug 2020 12:29:56 +0100 Subject: sched/topology: Propagate SD_ASYM_CPUCAPACITY upwards We currently set this flag *only* on domains whose topology level exactly match the level where we detect asymmetry (as returned by asym_cpu_capacity_level()). This is rather problematic. Say there are two clusters in the system, one with a lone big CPU and the other with a mix of big and LITTLE CPUs (as is allowed by DynamIQ): DIE [ ] MC [ ][ ] 0 1 2 3 4 L L B B B asym_cpu_capacity_level() will figure out that the MC level is the one where all CPUs can see a CPU of max capacity, and we will thus set SD_ASYM_CPUCAPACITY at MC level for all CPUs. That lone big CPU will degenerate its MC domain, since it would be alone in there, and will end up with just a DIE domain. Since the flag was only set at MC, this CPU ends up not seeing any SD with the flag set, which is broken. Rather than clearing dflags at every topology level, clear it before entering the topology level loop. This will properly propagate upwards flags that are set starting from a certain level. Signed-off-by: Valentin Schneider Signed-off-by: Ingo Molnar Reviewed-by: Quentin Perret Reviewed-by: Dietmar Eggemann Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20200817113003.20802-11-valentin.schneider@arm.com --- include/linux/sched/sd_flags.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index ee5cbfc7189f..40ad0d5c32e3 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -83,9 +83,11 @@ SD_FLAG(SD_WAKE_AFFINE, SDF_SHARED_CHILD) /* * Domain members have different CPU capacities * + * SHARED_PARENT: Set from the topmost domain down to the first domain where + * asymmetry is detected. * NEEDS_GROUPS: Per-CPU capacity is asymmetric between groups. */ -SD_FLAG(SD_ASYM_CPUCAPACITY, SDF_NEEDS_GROUPS) +SD_FLAG(SD_ASYM_CPUCAPACITY, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) /* * Domain members share CPU capacity (i.e. SMT) -- cgit v1.2.3 From 3a6712c7685352a5d71eaf459a4fddfc3589f018 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 17 Aug 2020 12:29:57 +0100 Subject: sched/topology: Mark SD_PREFER_SIBLING as SDF_NEEDS_GROUPS SD_PREFER_SIBLING is currently considered in sd_parent_degenerate() but not in sd_degenerate(). It too hinges on load balancing, and thus won't have any effect when set on a domain with a single group. Add it to SD_DEGENERATE_GROUPS_MASK. Signed-off-by: Valentin Schneider Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20200817113003.20802-12-valentin.schneider@arm.com --- include/linux/sched/sd_flags.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index 40ad0d5c32e3..d28fe67c3098 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -131,8 +131,10 @@ SD_FLAG(SD_ASYM_PACKING, SDF_SHARED_CHILD) * * Set up until domains start spanning NUMA nodes. Close to being a SHARED_CHILD * flag, but cleared below domains with SD_ASYM_CPUCAPACITY. + * + * NEEDS_GROUPS: Load balancing flag. */ -SD_FLAG(SD_PREFER_SIBLING, 0) +SD_FLAG(SD_PREFER_SIBLING, SDF_NEEDS_GROUPS) /* * sched_groups of this level overlap -- cgit v1.2.3 From 94b858fea1f2246a2fb7f7af21840fd14ced028f Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 17 Aug 2020 12:29:58 +0100 Subject: sched/topology: Mark SD_BALANCE_WAKE as SDF_NEEDS_GROUPS Even if no mainline topology uses this flag, it is a load balancing flag just like SD_BALANCE_FORK and requires 2+ groups to have any effect. Signed-off-by: Valentin Schneider Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20200817113003.20802-13-valentin.schneider@arm.com --- include/linux/sched/sd_flags.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index d28fe67c3098..729510a291b2 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -70,8 +70,9 @@ SD_FLAG(SD_BALANCE_FORK, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) * Balance on wakeup * * SHARED_CHILD: Set from the base domain up to cpuset.sched_relax_domain_level. + * NEEDS_GROUPS: Load balancing flag. */ -SD_FLAG(SD_BALANCE_WAKE, SDF_SHARED_CHILD) +SD_FLAG(SD_BALANCE_WAKE, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) /* * Consider waking task on waking CPU. -- cgit v1.2.3 From bdb7c802cc0a7e21f5223dc3ce41b7ac220c576e Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 17 Aug 2020 12:29:59 +0100 Subject: sched/topology: Mark SD_SERIALIZE as SDF_NEEDS_GROUPS There would be no point in preserving a sched_domain with a single group just because it has this flag set. Add it to SD_DEGENERATE_GROUPS_MASK. Signed-off-by: Valentin Schneider Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20200817113003.20802-14-valentin.schneider@arm.com --- include/linux/sched/sd_flags.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index 729510a291b2..b7f4d80e338e 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -112,11 +112,12 @@ SD_FLAG(SD_SHARE_PKG_RESOURCES, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) * Only a single load balancing instance * * SHARED_PARENT: Set for all NUMA levels above NODE. Could be set from a - * different level upwards, but it doesn't change that if a domain has this flag - * set, then all of its parents need to have it too (otherwise the serialization - * doesn't make sense). + * different level upwards, but it doesn't change that if a + * domain has this flag set, then all of its parents need to have + * it too (otherwise the serialization doesn't make sense). + * NEEDS_GROUPS: No point in preserving domain if it has a single group. */ -SD_FLAG(SD_SERIALIZE, SDF_SHARED_PARENT) +SD_FLAG(SD_SERIALIZE, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) /* * Place busy tasks earlier in the domain -- cgit v1.2.3 From 33199b0143daf4778d6301f966cb914d75f122eb Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 17 Aug 2020 12:30:00 +0100 Subject: sched/topology: Mark SD_ASYM_PACKING as SDF_NEEDS_GROUPS Being a load-balancing flag, it requires 2+ groups to have any effect. Signed-off-by: Valentin Schneider Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20200817113003.20802-15-valentin.schneider@arm.com --- include/linux/sched/sd_flags.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index b7f4d80e338e..2998ece2c18d 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -123,10 +123,11 @@ SD_FLAG(SD_SERIALIZE, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) * Place busy tasks earlier in the domain * * SHARED_CHILD: Usually set on the SMT level. Technically could be set further - * up, but currently assumed to be set from the base domain upwards (see - * update_top_cache_domain()). + * up, but currently assumed to be set from the base domain + * upwards (see update_top_cache_domain()). + * NEEDS_GROUPS: Load balancing flag. */ -SD_FLAG(SD_ASYM_PACKING, SDF_SHARED_CHILD) +SD_FLAG(SD_ASYM_PACKING, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) /* * Prefer to place tasks in a sibling domain -- cgit v1.2.3 From 3551e954f5d95faf3dbc340d422da7624658c230 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 17 Aug 2020 12:30:01 +0100 Subject: sched/topology: Mark SD_OVERLAP as SDF_NEEDS_GROUPS A sched_domain can only have overlapping sched_groups if it has more than one group. Signed-off-by: Valentin Schneider Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20200817113003.20802-16-valentin.schneider@arm.com --- include/linux/sched/sd_flags.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index 2998ece2c18d..29af5f032861 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -143,8 +143,9 @@ SD_FLAG(SD_PREFER_SIBLING, SDF_NEEDS_GROUPS) * sched_groups of this level overlap * * SHARED_PARENT: Set for all NUMA levels above NODE. + * NEEDS_GROUPS: Overlaps can only exist with more than one group. */ -SD_FLAG(SD_OVERLAP, SDF_SHARED_PARENT) +SD_FLAG(SD_OVERLAP, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) /* * Cross-node balancing -- cgit v1.2.3 From 5f4a1c4ea44728aa80be21dbf3a0469b5ca81d88 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 17 Aug 2020 12:30:02 +0100 Subject: sched/topology: Mark SD_NUMA as SDF_NEEDS_GROUPS There would be no point in preserving a sched_domain with a single group just because it has this flag set. Add it to SD_DEGENERATE_GROUPS_MASK. Signed-off-by: Valentin Schneider Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20200817113003.20802-17-valentin.schneider@arm.com --- include/linux/sched/sd_flags.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index 29af5f032861..34b21e971d77 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -151,5 +151,6 @@ SD_FLAG(SD_OVERLAP, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) * Cross-node balancing * * SHARED_PARENT: Set for all NUMA levels above NODE. + * NEEDS_GROUPS: No point in preserving domain if it has a single group. */ -SD_FLAG(SD_NUMA, SDF_SHARED_PARENT) +SD_FLAG(SD_NUMA, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) -- cgit v1.2.3 From 709c4362725abb5fa1e36fd94893a9b0d049df82 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 8 Jul 2020 16:39:29 +0000 Subject: cacheinfo: Move resctrl's get_cache_id() to the cacheinfo header file resctrl/core.c defines get_cache_id() for use in its cpu-hotplug callbacks. This gets the id attribute of the cache at the corresponding level of a CPU. Later rework means this private function needs to be shared. Move it to the header file. The name conflicts with a different definition in intel_cacheinfo.c, name it get_cpu_cacheinfo_id() to show its relation with get_cpu_cacheinfo(). Now this is visible on other architectures, check the id attribute has actually been set. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Babu Moger Reviewed-by: Reinette Chatre Link: https://lkml.kernel.org/r/20200708163929.2783-11-james.morse@arm.com --- include/linux/cacheinfo.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 46b92cd61d0c..4f72b47973c3 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -3,6 +3,7 @@ #define _LINUX_CACHEINFO_H #include +#include #include #include @@ -119,4 +120,24 @@ int acpi_find_last_cache_level(unsigned int cpu); const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf); +/* + * Get the id of the cache associated with @cpu at level @level. + * cpuhp lock must be held. + */ +static inline int get_cpu_cacheinfo_id(int cpu, int level) +{ + struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); + int i; + + for (i = 0; i < ci->num_leaves; i++) { + if (ci->info_list[i].level == level) { + if (ci->info_list[i].attributes & CACHE_ID) + return ci->info_list[i].id; + return -1; + } + } + + return -1; +} + #endif /* _LINUX_CACHEINFO_H */ -- cgit v1.2.3 From 2024f91e965f7d7a38364874031c2132dcd11992 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 19 Aug 2020 13:47:15 +0200 Subject: ns: Add a common refcount into ns_common Currently every namespace type has its own lifetime counter which is stored in the specific namespace struct. The lifetime counters are used identically for all namespaces types. Namespaces may of course have additional unrelated counters and these are not altered. This introduces a common lifetime counter into struct ns_common. The ns_common struct encompasses information that all namespaces share. That should include the lifetime counter since its common for all of them. It also allows us to unify the type of the counters across all namespaces. Most of them use refcount_t but one uses atomic_t and at least one uses kref. Especially the last one doesn't make much sense since it's just a wrapper around refcount_t since 2016 and actually complicates cleanup operations by having to use container_of() to cast the correct namespace struct out of struct ns_common. Having the lifetime counter for the namespaces in one place reduces maintenance cost. Not just because after switching all namespaces over we will have removed more code than we added but also because the logic is more easily understandable and we indicate to the user that the basic lifetime requirements for all namespaces are currently identical. Signed-off-by: Kirill Tkhai Reviewed-by: Kees Cook Acked-by: Christian Brauner [christian.brauner@ubuntu.com: rewrite commit & split into two patches] Signed-off-by: Christian Brauner --- include/linux/ns_common.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 5fbc4000358f..0f1d024bd958 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -2,12 +2,15 @@ #ifndef _LINUX_NS_COMMON_H #define _LINUX_NS_COMMON_H +#include + struct proc_ns_operations; struct ns_common { atomic_long_t stashed; const struct proc_ns_operations *ops; unsigned int inum; + refcount_t count; }; #endif -- cgit v1.2.3 From 9a56493f6942c0e2df1579986128721da96e00d8 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 3 Aug 2020 13:16:21 +0300 Subject: uts: Use generic ns_common::count Switch over uts namespaces to use the newly introduced common lifetime counter. Currently every namespace type has its own lifetime counter which is stored in the specific namespace struct. The lifetime counters are used identically for all namespaces types. Namespaces may of course have additional unrelated counters and these are not altered. This introduces a common lifetime counter into struct ns_common. The ns_common struct encompasses information that all namespaces share. That should include the lifetime counter since its common for all of them. It also allows us to unify the type of the counters across all namespaces. Most of them use refcount_t but one uses atomic_t and at least one uses kref. Especially the last one doesn't make much sense since it's just a wrapper around refcount_t since 2016 and actually complicates cleanup operations by having to use container_of() to cast the correct namespace struct out of struct ns_common. Having the lifetime counter for the namespaces in one place reduces maintenance cost. Not just because after switching all namespaces over we will have removed more code than we added but also because the logic is more easily understandable and we indicate to the user that the basic lifetime requirements for all namespaces are currently identical. Signed-off-by: Kirill Tkhai Reviewed-by: Kees Cook Acked-by: Christian Brauner Link: https://lore.kernel.org/r/159644978167.604812.1773586504374412107.stgit@localhost.localdomain Signed-off-by: Christian Brauner --- include/linux/utsname.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 44429d9142ca..2b1737c9b244 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -4,7 +4,6 @@ #include -#include #include #include #include @@ -22,7 +21,6 @@ struct user_namespace; extern struct user_namespace init_user_ns; struct uts_namespace { - struct kref kref; struct new_utsname name; struct user_namespace *user_ns; struct ucounts *ucounts; @@ -33,16 +31,17 @@ extern struct uts_namespace init_uts_ns; #ifdef CONFIG_UTS_NS static inline void get_uts_ns(struct uts_namespace *ns) { - kref_get(&ns->kref); + refcount_inc(&ns->ns.count); } extern struct uts_namespace *copy_utsname(unsigned long flags, struct user_namespace *user_ns, struct uts_namespace *old_ns); -extern void free_uts_ns(struct kref *kref); +extern void free_uts_ns(struct uts_namespace *ns); static inline void put_uts_ns(struct uts_namespace *ns) { - kref_put(&ns->kref, free_uts_ns); + if (refcount_dec_and_test(&ns->ns.count)) + free_uts_ns(ns); } void uts_ns_init(void); -- cgit v1.2.3 From 137ec390fad41928307216ea9f91acf5cf6f4204 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 3 Aug 2020 13:16:27 +0300 Subject: ipc: Use generic ns_common::count Switch over ipc namespaces to use the newly introduced common lifetime counter. Currently every namespace type has its own lifetime counter which is stored in the specific namespace struct. The lifetime counters are used identically for all namespaces types. Namespaces may of course have additional unrelated counters and these are not altered. This introduces a common lifetime counter into struct ns_common. The ns_common struct encompasses information that all namespaces share. That should include the lifetime counter since its common for all of them. It also allows us to unify the type of the counters across all namespaces. Most of them use refcount_t but one uses atomic_t and at least one uses kref. Especially the last one doesn't make much sense since it's just a wrapper around refcount_t since 2016 and actually complicates cleanup operations by having to use container_of() to cast the correct namespace struct out of struct ns_common. Having the lifetime counter for the namespaces in one place reduces maintenance cost. Not just because after switching all namespaces over we will have removed more code than we added but also because the logic is more easily understandable and we indicate to the user that the basic lifetime requirements for all namespaces are currently identical. Signed-off-by: Kirill Tkhai Reviewed-by: Kees Cook Acked-by: Christian Brauner Link: https://lore.kernel.org/r/159644978697.604812.16592754423881032385.stgit@localhost.localdomain Signed-off-by: Christian Brauner --- include/linux/ipc_namespace.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index a06a78c67f19..05e22770af51 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -27,7 +27,6 @@ struct ipc_ids { }; struct ipc_namespace { - refcount_t count; struct ipc_ids ids[3]; int sem_ctls[4]; @@ -128,7 +127,7 @@ extern struct ipc_namespace *copy_ipcs(unsigned long flags, static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { if (ns) - refcount_inc(&ns->count); + refcount_inc(&ns->ns.count); return ns; } -- cgit v1.2.3 From 8eb71d95f34a009cc22084e05e78eb9686f7ea28 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 3 Aug 2020 13:16:32 +0300 Subject: pid: Use generic ns_common::count Switch over pid namespaces to use the newly introduced common lifetime counter. Currently every namespace type has its own lifetime counter which is stored in the specific namespace struct. The lifetime counters are used identically for all namespaces types. Namespaces may of course have additional unrelated counters and these are not altered. This introduces a common lifetime counter into struct ns_common. The ns_common struct encompasses information that all namespaces share. That should include the lifetime counter since its common for all of them. It also allows us to unify the type of the counters across all namespaces. Most of them use refcount_t but one uses atomic_t and at least one uses kref. Especially the last one doesn't make much sense since it's just a wrapper around refcount_t since 2016 and actually complicates cleanup operations by having to use container_of() to cast the correct namespace struct out of struct ns_common. Having the lifetime counter for the namespaces in one place reduces maintenance cost. Not just because after switching all namespaces over we will have removed more code than we added but also because the logic is more easily understandable and we indicate to the user that the basic lifetime requirements for all namespaces are currently identical. Signed-off-by: Kirill Tkhai Reviewed-by: Kees Cook Acked-by: Christian Brauner Link: https://lore.kernel.org/r/159644979226.604812.7512601754841882036.stgit@localhost.localdomain Signed-off-by: Christian Brauner --- include/linux/pid_namespace.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 5a5cb45ac57e..7c7e627503d2 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -18,7 +17,6 @@ struct fs_pin; struct pid_namespace { - struct kref kref; struct idr idr; struct rcu_head rcu; unsigned int pid_allocated; @@ -43,7 +41,7 @@ extern struct pid_namespace init_pid_ns; static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) { if (ns != &init_pid_ns) - kref_get(&ns->kref); + refcount_inc(&ns->ns.count); return ns; } -- cgit v1.2.3 From 265cbd62e034cb09a9da7cbff9072c8082f8df65 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 3 Aug 2020 13:16:37 +0300 Subject: user: Use generic ns_common::count Switch over user namespaces to use the newly introduced common lifetime counter. Currently every namespace type has its own lifetime counter which is stored in the specific namespace struct. The lifetime counters are used identically for all namespaces types. Namespaces may of course have additional unrelated counters and these are not altered. This introduces a common lifetime counter into struct ns_common. The ns_common struct encompasses information that all namespaces share. That should include the lifetime counter since its common for all of them. It also allows us to unify the type of the counters across all namespaces. Most of them use refcount_t but one uses atomic_t and at least one uses kref. Especially the last one doesn't make much sense since it's just a wrapper around refcount_t since 2016 and actually complicates cleanup operations by having to use container_of() to cast the correct namespace struct out of struct ns_common. Having the lifetime counter for the namespaces in one place reduces maintenance cost. Not just because after switching all namespaces over we will have removed more code than we added but also because the logic is more easily understandable and we indicate to the user that the basic lifetime requirements for all namespaces are currently identical. Signed-off-by: Kirill Tkhai Reviewed-by: Kees Cook Acked-by: Christian Brauner Link: https://lore.kernel.org/r/159644979754.604812.601625186726406922.stgit@localhost.localdomain Signed-off-by: Christian Brauner --- include/linux/user_namespace.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 6ef1c7109fc4..64cf8ebdc4ec 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -57,7 +57,6 @@ struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; struct uid_gid_map projid_map; - atomic_t count; struct user_namespace *parent; int level; kuid_t owner; @@ -109,7 +108,7 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type); static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) - atomic_inc(&ns->count); + refcount_inc(&ns->ns.count); return ns; } @@ -119,7 +118,7 @@ extern void __put_user_ns(struct user_namespace *ns); static inline void put_user_ns(struct user_namespace *ns) { - if (ns && atomic_dec_and_test(&ns->count)) + if (ns && refcount_dec_and_test(&ns->ns.count)) __put_user_ns(ns); } -- cgit v1.2.3 From f387882d8d3eda7c7b13e330c73907035569ce4a Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 3 Aug 2020 13:16:50 +0300 Subject: cgroup: Use generic ns_common::count Switch over cgroup namespaces to use the newly introduced common lifetime counter. Currently every namespace type has its own lifetime counter which is stored in the specific namespace struct. The lifetime counters are used identically for all namespaces types. Namespaces may of course have additional unrelated counters and these are not altered. This introduces a common lifetime counter into struct ns_common. The ns_common struct encompasses information that all namespaces share. That should include the lifetime counter since its common for all of them. It also allows us to unify the type of the counters across all namespaces. Most of them use refcount_t but one uses atomic_t and at least one uses kref. Especially the last one doesn't make much sense since it's just a wrapper around refcount_t since 2016 and actually complicates cleanup operations by having to use container_of() to cast the correct namespace struct out of struct ns_common. Having the lifetime counter for the namespaces in one place reduces maintenance cost. Not just because after switching all namespaces over we will have removed more code than we added but also because the logic is more easily understandable and we indicate to the user that the basic lifetime requirements for all namespaces are currently identical. Signed-off-by: Kirill Tkhai Reviewed-by: Kees Cook Acked-by: Christian Brauner Link: https://lore.kernel.org/r/159644980994.604812.383801057081594972.stgit@localhost.localdomain Signed-off-by: Christian Brauner --- include/linux/cgroup.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 618838c48313..451c2d26a5db 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -854,7 +854,6 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} #endif /* CONFIG_CGROUP_DATA */ struct cgroup_namespace { - refcount_t count; struct ns_common ns; struct user_namespace *user_ns; struct ucounts *ucounts; @@ -889,12 +888,12 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, static inline void get_cgroup_ns(struct cgroup_namespace *ns) { if (ns) - refcount_inc(&ns->count); + refcount_inc(&ns->ns.count); } static inline void put_cgroup_ns(struct cgroup_namespace *ns) { - if (ns && refcount_dec_and_test(&ns->count)) + if (ns && refcount_dec_and_test(&ns->ns.count)) free_cgroup_ns(ns); } -- cgit v1.2.3 From 28c41efd08bf97fc64f75304035ee3943995b68e Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 3 Aug 2020 13:17:00 +0300 Subject: time: Use generic ns_common::count Switch over time namespaces to use the newly introduced common lifetime counter. Currently every namespace type has its own lifetime counter which is stored in the specific namespace struct. The lifetime counters are used identically for all namespaces types. Namespaces may of course have additional unrelated counters and these are not altered. This introduces a common lifetime counter into struct ns_common. The ns_common struct encompasses information that all namespaces share. That should include the lifetime counter since its common for all of them. It also allows us to unify the type of the counters across all namespaces. Most of them use refcount_t but one uses atomic_t and at least one uses kref. Especially the last one doesn't make much sense since it's just a wrapper around refcount_t since 2016 and actually complicates cleanup operations by having to use container_of() to cast the correct namespace struct out of struct ns_common. Having the lifetime counter for the namespaces in one place reduces maintenance cost. Not just because after switching all namespaces over we will have removed more code than we added but also because the logic is more easily understandable and we indicate to the user that the basic lifetime requirements for all namespaces are currently identical. Signed-off-by: Kirill Tkhai Reviewed-by: Kees Cook Acked-by: Christian Brauner Link: https://lore.kernel.org/r/159644982033.604812.9406853013011123238.stgit@localhost.localdomain Signed-off-by: Christian Brauner --- include/linux/time_namespace.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 5b6031385db0..a51ffc089219 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -4,7 +4,6 @@ #include -#include #include #include #include @@ -18,7 +17,6 @@ struct timens_offsets { }; struct time_namespace { - struct kref kref; struct user_namespace *user_ns; struct ucounts *ucounts; struct ns_common ns; @@ -37,20 +35,21 @@ extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns); static inline struct time_namespace *get_time_ns(struct time_namespace *ns) { - kref_get(&ns->kref); + refcount_inc(&ns->ns.count); return ns; } struct time_namespace *copy_time_ns(unsigned long flags, struct user_namespace *user_ns, struct time_namespace *old_ns); -void free_time_ns(struct kref *kref); +void free_time_ns(struct time_namespace *ns); int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk); struct vdso_data *arch_get_vdso_data(void *vvar_page); static inline void put_time_ns(struct time_namespace *ns) { - kref_put(&ns->kref, free_time_ns); + if (refcount_dec_and_test(&ns->ns.count)) + free_time_ns(ns); } void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m); -- cgit v1.2.3 From b84e23f5135103c45022b0e4a4ed2459d5398a7e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 6 Aug 2020 20:20:23 +0200 Subject: ARM: s3c24xx: pass pointer to clk driver via platform data Passing pointers directly as platform data is fragile and undocumented. Better to create a platform data structure which explicitly documents what is passed to the driver. Suggested-by: Tomasz Figa Signed-off-by: Krzysztof Kozlowski Acked-by: Arnd Bergmann Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20200806182059.2431-6-krzk@kernel.org --- include/linux/platform_data/clk-s3c2410.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/platform_data/clk-s3c2410.h (limited to 'include/linux') diff --git a/include/linux/platform_data/clk-s3c2410.h b/include/linux/platform_data/clk-s3c2410.h new file mode 100644 index 000000000000..7eb1cfa5409b --- /dev/null +++ b/include/linux/platform_data/clk-s3c2410.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020 Krzysztof Kozlowski + */ + +#ifndef __LINUX_PLATFORM_DATA_CLK_S3C2410_H_ +#define __LINUX_PLATFORM_DATA_CLK_S3C2410_H_ + +/** + * struct s3c2410_clk_platform_data - platform data for S3C2410 clock driver + * + * @modify_misccr: Function to modify the MISCCR and return the new value + */ +struct s3c2410_clk_platform_data { + unsigned int (*modify_misccr)(unsigned int clr, unsigned int chg); +}; + +#endif /* __LINUX_PLATFORM_DATA_CLK_S3C2410_H_ */ + -- cgit v1.2.3 From 5f745424761a2a49762625e8616417a8e7694228 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 6 Aug 2020 20:20:26 +0200 Subject: usb: gadget: s3c-hsudc: remove platform header dependency There is no real phy driver, so s3c-hsudc just pokes the registers itself. Improve this a little by making it a platform data callback like we do for gpios. There is only one board using this driver, and it's unlikely that another would be added, so this is a minimal workaround. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20200806182059.2431-9-krzk@kernel.org [krzk: Include regs-s3c2443-clock.h in ifdef to fixup build on s3c6400] Signed-off-by: Krzysztof Kozlowski --- include/linux/platform_data/s3c-hsudc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/s3c-hsudc.h b/include/linux/platform_data/s3c-hsudc.h index 4dc9b8760166..a170939832d5 100644 --- a/include/linux/platform_data/s3c-hsudc.h +++ b/include/linux/platform_data/s3c-hsudc.h @@ -26,6 +26,8 @@ struct s3c24xx_hsudc_platdata { unsigned int epnum; void (*gpio_init)(void); void (*gpio_uninit)(void); + void (*phy_init)(void); + void (*phy_uninit)(void); }; #endif /* __LINUX_USB_S3C_HSUDC_H */ -- cgit v1.2.3 From 17132da70eb766785b9b4677bacce18cc11ea442 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 6 Aug 2020 20:20:33 +0200 Subject: ARM: samsung: move pm check code to drivers/soc This is the only part of plat-samsung that is really shared between the s3c and s5p ports. Moving it to drivers/soc/ lets us make them completely independent. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20200806182059.2431-16-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski --- include/linux/soc/samsung/s3c-pm.h | 84 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 include/linux/soc/samsung/s3c-pm.h (limited to 'include/linux') diff --git a/include/linux/soc/samsung/s3c-pm.h b/include/linux/soc/samsung/s3c-pm.h new file mode 100644 index 000000000000..730bd1d3d09a --- /dev/null +++ b/include/linux/soc/samsung/s3c-pm.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * Tomasz Figa + * Copyright (c) 2004 Simtec Electronics + * http://armlinux.simtec.co.uk/ + * Written by Ben Dooks, + */ + +#ifndef __LINUX_SOC_SAMSUNG_S3C_PM_H +#define __LINUX_SOC_SAMSUNG_S3C_PM_H __FILE__ + +#include + +/* PM debug functions */ + +/** + * struct pm_uart_save - save block for core UART + * @ulcon: Save value for S3C2410_ULCON + * @ucon: Save value for S3C2410_UCON + * @ufcon: Save value for S3C2410_UFCON + * @umcon: Save value for S3C2410_UMCON + * @ubrdiv: Save value for S3C2410_UBRDIV + * + * Save block for UART registers to be held over sleep and restored if they + * are needed (say by debug). +*/ +struct pm_uart_save { + u32 ulcon; + u32 ucon; + u32 ufcon; + u32 umcon; + u32 ubrdiv; + u32 udivslot; +}; + +#ifdef CONFIG_SAMSUNG_PM_DEBUG +/** + * s3c_pm_dbg() - low level debug function for use in suspend/resume. + * @msg: The message to print. + * + * This function is used mainly to debug the resume process before the system + * can rely on printk/console output. It uses the low-level debugging output + * routine printascii() to do its work. + */ +extern void s3c_pm_dbg(const char *msg, ...); + +#define S3C_PMDBG(fmt...) s3c_pm_dbg(fmt) + +extern void s3c_pm_save_uarts(bool is_s3c24xx); +extern void s3c_pm_restore_uarts(bool is_s3c24xx); + +#ifdef CONFIG_ARCH_S3C64XX +extern void s3c_pm_arch_update_uart(void __iomem *regs, + struct pm_uart_save *save); +#else +static inline void +s3c_pm_arch_update_uart(void __iomem *regs, struct pm_uart_save *save) +{ +} +#endif + +#else +#define S3C_PMDBG(fmt...) pr_debug(fmt) + +static inline void s3c_pm_save_uarts(bool is_s3c24xx) { } +static inline void s3c_pm_restore_uarts(bool is_s3c24xx) { } +#endif + +/* suspend memory checking */ + +#ifdef CONFIG_SAMSUNG_PM_CHECK +extern void s3c_pm_check_prepare(void); +extern void s3c_pm_check_restore(void); +extern void s3c_pm_check_cleanup(void); +extern void s3c_pm_check_store(void); +#else +#define s3c_pm_check_prepare() do { } while (0) +#define s3c_pm_check_restore() do { } while (0) +#define s3c_pm_check_cleanup() do { } while (0) +#define s3c_pm_check_store() do { } while (0) +#endif + +#endif -- cgit v1.2.3 From 7dbad03ebcb924cde142f7477d65a54ffb1166a3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 6 Aug 2020 20:20:39 +0200 Subject: ARM: s3c: adc: move header to linux/soc/samsung There are multiple drivers using the private adc interface. It seems unlikely that they would ever get converted to iio, so make the current state official by making the header file global. The s3c2410_ts driver needs a couple of register definitions as well. Signed-off-by: Arnd Bergmann Acked-by: Guenter Roeck Acked-by: Dmitry Torokhov Acked-by: Sebastian Reichel Link: https://lore.kernel.org/r/20200806182059.2431-22-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski --- include/linux/soc/samsung/s3c-adc.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/linux/soc/samsung/s3c-adc.h (limited to 'include/linux') diff --git a/include/linux/soc/samsung/s3c-adc.h b/include/linux/soc/samsung/s3c-adc.h new file mode 100644 index 000000000000..591c94ef957d --- /dev/null +++ b/include/linux/soc/samsung/s3c-adc.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2008 Simtec Electronics + * http://armlinux.simtec.co.uk/ + * Ben Dooks + * + * S3C ADC driver information + */ + +#ifndef __LINUX_SOC_SAMSUNG_S3C_ADC_H +#define __LINUX_SOC_SAMSUNG_S3C_ADC_H __FILE__ + +struct s3c_adc_client; +struct platform_device; + +extern int s3c_adc_start(struct s3c_adc_client *client, + unsigned int channel, unsigned int nr_samples); + +extern int s3c_adc_read(struct s3c_adc_client *client, unsigned int ch); + +extern struct s3c_adc_client * + s3c_adc_register(struct platform_device *pdev, + void (*select)(struct s3c_adc_client *client, + unsigned selected), + void (*conv)(struct s3c_adc_client *client, + unsigned d0, unsigned d1, + unsigned *samples_left), + unsigned int is_ts); + +extern void s3c_adc_release(struct s3c_adc_client *client); + +#endif /* __LINUX_SOC_SAMSUNG_S3C_ADC_H */ -- cgit v1.2.3 From f131a4443ea468cd532410c271c229bb39caab08 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 3 Sep 2019 11:31:09 +0200 Subject: ARM: s3c24xx: move spi fiq handler into platform The fiq handler needs access to some register definitions that should not be used directly by device drivers. Since this is closely related to the irqchip driver anyway, move it into the same place. Signed-off-by: Arnd Bergmann [krzk: Add a header guard in include/linux/spi/s3c24xx-fiq.h, fix SPDX comment style, update maintainer's entry] Co-developed-by: Krzysztof Kozlowski Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20200806182059.2431-23-krzk%40kernel.org Acked-by: Mark Brown --- include/linux/spi/s3c24xx-fiq.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 include/linux/spi/s3c24xx-fiq.h (limited to 'include/linux') diff --git a/include/linux/spi/s3c24xx-fiq.h b/include/linux/spi/s3c24xx-fiq.h new file mode 100644 index 000000000000..d2842ac1de27 --- /dev/null +++ b/include/linux/spi/s3c24xx-fiq.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* linux/drivers/spi/spi_s3c24xx_fiq.h + * + * Copyright 2009 Simtec Electronics + * Ben Dooks + * + * S3C24XX SPI - FIQ pseudo-DMA transfer support +*/ + +#ifndef __LINUX_SPI_S3C24XX_FIQ_H +#define __LINUX_SPI_S3C24XX_FIQ_H __FILE__ + +/* We have R8 through R13 to play with */ + +#ifdef __ASSEMBLY__ +#define __REG_NR(x) r##x +#else + +extern struct spi_fiq_code s3c24xx_spi_fiq_txrx; +extern struct spi_fiq_code s3c24xx_spi_fiq_tx; +extern struct spi_fiq_code s3c24xx_spi_fiq_rx; + +#define __REG_NR(x) (x) +#endif + +#define fiq_rspi __REG_NR(8) +#define fiq_rtmp __REG_NR(9) +#define fiq_rrx __REG_NR(10) +#define fiq_rtx __REG_NR(11) +#define fiq_rcount __REG_NR(12) +#define fiq_rirq __REG_NR(13) + +#endif /* __LINUX_SPI_S3C24XX_FIQ_H */ -- cgit v1.2.3 From b558b6c24068d87ffcd95dad3bf0d9bd2ac290e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 18 Aug 2020 18:07:09 -0700 Subject: net-tun: Add type safety to tun_xdp_to_ptr() and tun_ptr_to_xdp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reduces likelihood of incorrect use. Test: builds Signed-off-by: Maciej Żenczykowski Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200819010710.3959310-1-zenczykowski@gmail.com --- include/linux/if_tun.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 5bda8cf457b6..6c37e1dbc5df 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -28,8 +28,8 @@ struct tun_xdp_hdr { struct socket *tun_get_socket(struct file *); struct ptr_ring *tun_get_tx_ring(struct file *file); bool tun_is_xdp_frame(void *ptr); -void *tun_xdp_to_ptr(void *ptr); -void *tun_ptr_to_xdp(void *ptr); +void *tun_xdp_to_ptr(struct xdp_frame *xdp); +struct xdp_frame *tun_ptr_to_xdp(void *ptr); void tun_ptr_free(void *ptr); #else #include @@ -48,11 +48,11 @@ static inline bool tun_is_xdp_frame(void *ptr) { return false; } -static inline void *tun_xdp_to_ptr(void *ptr) +static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp) { return NULL; } -static inline void *tun_ptr_to_xdp(void *ptr) +static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr) { return NULL; } -- cgit v1.2.3 From 596b5ef458f903d4362fb3c69967b6d8a23334bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 18 Aug 2020 18:07:10 -0700 Subject: net-tun: Eliminate two tun/xdp related function calls from vhost-net MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This provides a minor performance boost by virtue of inlining instead of cross module function calls. Test: builds Signed-off-by: Maciej Żenczykowski Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200819010710.3959310-2-zenczykowski@gmail.com --- include/linux/if_tun.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 6c37e1dbc5df..2a7660843444 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -27,9 +27,18 @@ struct tun_xdp_hdr { #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); struct ptr_ring *tun_get_tx_ring(struct file *file); -bool tun_is_xdp_frame(void *ptr); -void *tun_xdp_to_ptr(struct xdp_frame *xdp); -struct xdp_frame *tun_ptr_to_xdp(void *ptr); +static inline bool tun_is_xdp_frame(void *ptr) +{ + return (unsigned long)ptr & TUN_XDP_FLAG; +} +static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp) +{ + return (void *)((unsigned long)xdp | TUN_XDP_FLAG); +} +static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr) +{ + return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG); +} void tun_ptr_free(void *ptr); #else #include -- cgit v1.2.3 From bdfbb63c314a6fb7d27dddd62f5a3e4f062b92bb Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Tue, 18 Aug 2020 12:32:43 +0200 Subject: ptp: Add generic ptp v2 header parsing function Reason: A lot of the ptp drivers - which implement hardware time stamping - need specific fields such as the sequence id from the ptp v2 header. Currently all drivers implement that themselves. Introduce a generic function to retrieve a pointer to the start of the ptp v2 header. Suggested-by: Russell King Signed-off-by: Kurt Kanzenbach Reviewed-by: Richard Cochran Reviewed-by: Florian Fainelli Reviewed-by: Russell King Signed-off-by: David S. Miller --- include/linux/ptp_classify.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index dd00fa41f7e7..0a9cc0eb0801 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -44,6 +44,30 @@ #define OFF_IHL 14 #define IPV4_HLEN(data) (((struct iphdr *)(data + OFF_IHL))->ihl << 2) +struct clock_identity { + u8 id[8]; +} __packed; + +struct port_identity { + struct clock_identity clock_identity; + __be16 port_number; +} __packed; + +struct ptp_header { + u8 tsmt; /* transportSpecific | messageType */ + u8 ver; /* reserved | versionPTP */ + __be16 message_length; + u8 domain_number; + u8 reserved1; + u8 flag_field[2]; + __be64 correction; + __be32 reserved2; + struct port_identity source_port_identity; + __be16 sequence_id; + u8 control; + u8 log_message_interval; +} __packed; + #if defined(CONFIG_NET_PTP_CLASSIFY) /** * ptp_classify_raw - classify a PTP packet @@ -57,6 +81,21 @@ */ unsigned int ptp_classify_raw(const struct sk_buff *skb); +/** + * ptp_parse_header - Get pointer to the PTP v2 header + * @skb: packet buffer + * @type: type of the packet (see ptp_classify_raw()) + * + * This function takes care of the VLAN, UDP, IPv4 and IPv6 headers. The length + * is checked. + * + * Note, internally skb_mac_header() is used. Make sure that the @skb is + * initialized accordingly. + * + * Return: Pointer to the ptp v2 header or NULL if not found + */ +struct ptp_header *ptp_parse_header(struct sk_buff *skb, unsigned int type); + void __init ptp_classifier_init(void); #else static inline void ptp_classifier_init(void) @@ -66,5 +105,10 @@ static inline unsigned int ptp_classify_raw(struct sk_buff *skb) { return PTP_CLASS_NONE; } +static inline struct ptp_header *ptp_parse_header(struct sk_buff *skb, + unsigned int type) +{ + return NULL; +} #endif #endif /* _PTP_CLASSIFY_H_ */ -- cgit v1.2.3 From 036c508ba95e573f53bd5bbb79b0c4d71003b319 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Tue, 18 Aug 2020 12:32:44 +0200 Subject: ptp: Add generic ptp message type function The message type is located at different offsets within the ptp header depending on the ptp version (v1 or v2). Therefore, drivers which also deal with ptp v1 have some code for it. Extract this into a helper function for drivers to be used. Signed-off-by: Kurt Kanzenbach Reviewed-by: Richard Cochran Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/ptp_classify.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index 0a9cc0eb0801..cfc6d9152f69 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -96,6 +96,31 @@ unsigned int ptp_classify_raw(const struct sk_buff *skb); */ struct ptp_header *ptp_parse_header(struct sk_buff *skb, unsigned int type); +/** + * ptp_get_msgtype - Extract ptp message type from given header + * @hdr: ptp header + * @type: type of the packet (see ptp_classify_raw()) + * + * This function returns the message type for a given ptp header. It takes care + * of the different ptp header versions (v1 or v2). + * + * Return: The message type + */ +static inline u8 ptp_get_msgtype(const struct ptp_header *hdr, + unsigned int type) +{ + u8 msgtype; + + if (unlikely(type & PTP_CLASS_V1)) { + /* msg type is located at the control field for ptp v1 */ + msgtype = hdr->control; + } else { + msgtype = hdr->tsmt & 0x0f; + } + + return msgtype; +} + void __init ptp_classifier_init(void); #else static inline void ptp_classifier_init(void) -- cgit v1.2.3 From 17060fb5069f2c73a566015ce6e019d5685680b5 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Tue, 18 Aug 2020 12:32:51 +0200 Subject: ptp: Remove unused macro The offset for the control field is not needed anymore. Remove it. Signed-off-by: Kurt Kanzenbach Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/ptp_classify.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index cfc6d9152f69..8437307cca8c 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -36,7 +36,6 @@ #define OFF_PTP_SOURCE_UUID 22 /* PTPv1 only */ #define OFF_PTP_SEQUENCE_ID 30 -#define OFF_PTP_CONTROL 32 /* PTPv1 only */ /* Below defines should actually be removed at some point in time. */ #define IP6_HLEN 40 -- cgit v1.2.3 From cad6967ac10843a70842cd39c7b53412901dd21f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 19 Aug 2020 12:46:45 +0200 Subject: fork: introduce kernel_clone() The old _do_fork() helper doesn't follow naming conventions of in-kernel helpers for syscalls. The process creation cleanup in [1] didn't change the name to something more reasonable mainly because _do_fork() was used in quite a few places. So sending this as a separate series seemed the better strategy. This commit does two things: 1. renames _do_fork() to kernel_clone() but keeps _do_fork() as a simple static inline wrapper around kernel_clone(). 2. Changes the return type from long to pid_t. This aligns kernel_thread() and kernel_clone(). Also, the return value from kernel_clone that is surfaced in fork(), vfork(), clone(), and clone3() is taken from pid_vrn() which returns a pid_t too. Follow-up patches will switch each caller of _do_fork() and each place where it is referenced over to kernel_clone(). After all these changes are done, we can remove _do_fork() completely and will only be left with kernel_clone(). [1]: 9ba27414f2ec ("Merge tag 'fork-v5.9' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux") Signed-off-by: Christian Brauner Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Matthew Wilcox (Oracle) Cc: "Peter Zijlstra (Intel)" Link: https://lore.kernel.org/r/20200819104655.436656-2-christian.brauner@ubuntu.com --- include/linux/sched/task.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index a98965007eef..d4428039c3c1 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -83,7 +83,11 @@ extern void do_group_exit(int); extern void exit_files(struct task_struct *); extern void exit_itimers(struct signal_struct *); -extern long _do_fork(struct kernel_clone_args *kargs); +extern pid_t kernel_clone(struct kernel_clone_args *kargs); +static inline long _do_fork(struct kernel_clone_args *kargs) +{ + return kernel_clone(kargs); +} struct task_struct *fork_idle(int); struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -- cgit v1.2.3 From 06fe45634942dc96c316bbb789049a4b0b692542 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 19 Aug 2020 12:46:55 +0200 Subject: sched: remove _do_fork() Now that all callers of _do_fork() have been switched to kernel_clone() remove the _do_fork() helper. Signed-off-by: Christian Brauner Link: https://lore.kernel.org/r/20200819104655.436656-12-christian.brauner@ubuntu.com --- include/linux/sched/task.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index d4428039c3c1..85fb2f34c59b 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -84,10 +84,6 @@ extern void exit_files(struct task_struct *); extern void exit_itimers(struct signal_struct *); extern pid_t kernel_clone(struct kernel_clone_args *kargs); -static inline long _do_fork(struct kernel_clone_args *kargs) -{ - return kernel_clone(kargs); -} struct task_struct *fork_idle(int); struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -- cgit v1.2.3 From 005142b8a1f0f32d33fbe04b728464c1b7acfa0e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 18 Aug 2020 21:27:56 -0700 Subject: bpf: Factor out bpf_link_by_id() helper. Refactor the code a bit to extract bpf_link_by_id() helper. It's similar to existing bpf_prog_by_id(). Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200819042759.51280-2-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 55f694b63164..a9b7185a6b37 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1358,6 +1358,7 @@ int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog, struct btf *btf, const struct btf_type *t); struct bpf_prog *bpf_prog_by_id(u32 id); +struct bpf_link *bpf_link_by_id(u32 id); const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); #else /* !CONFIG_BPF_SYSCALL */ -- cgit v1.2.3 From f67f6c00c7f367fe90f2bc01b9a977aa13de870e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 6 Aug 2020 20:20:46 +0200 Subject: ARM: s3c24xx: move s3cmci pinctrl handling into board files Rather than call the internal s3c_gpio_cfgall_range() function through a platform header, move the code into the set_power callback that is already exported by the board, and add a default implementation. In DT mode, the code already does not set the pin config, so nothing changes there. Signed-off-by: Arnd Bergmann Acked-by: Ulf Hansson Link: https://lore.kernel.org/r/20200806182059.2431-29-krzk@kernel.org [krzk: Rebase and correct set_power in mach-h1940.c] Signed-off-by: Krzysztof Kozlowski --- include/linux/platform_data/mmc-s3cmci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/mmc-s3cmci.h b/include/linux/platform_data/mmc-s3cmci.h index 33310b11cbdd..bacb86db3112 100644 --- a/include/linux/platform_data/mmc-s3cmci.h +++ b/include/linux/platform_data/mmc-s3cmci.h @@ -35,6 +35,7 @@ struct s3c24xx_mci_pdata { unsigned long ocr_avail; void (*set_power)(unsigned char power_mode, unsigned short vdd); + struct gpio_desc *bus[6]; }; /** @@ -44,6 +45,7 @@ struct s3c24xx_mci_pdata { * Copy the platform data supplied by @pdata so that this can be marked * __initdata. */ +extern void s3c24xx_mci_def_set_power(unsigned char power_mode, unsigned short vdd); extern void s3c24xx_mci_set_platdata(struct s3c24xx_mci_pdata *pdata); #endif /* _ARCH_NCI_H */ -- cgit v1.2.3 From cd4bd8f9435ddf08a8677f56abf418423f223959 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 6 Aug 2020 20:20:48 +0200 Subject: ARM: s3c24xx: spi: avoid hardcoding fiq number in driver The IRQ_EINT0 constant is a platform detail that is defined in mach/irqs.h and not visible to drivers once that header is made private. Since the same calculation already happens in s3c24xx_set_fiq, just return the value from there. Signed-off-by: Arnd Bergmann Acked-by: Mark Brown Link: https://lore.kernel.org/r/20200806182059.2431-31-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski --- include/linux/spi/s3c24xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/s3c24xx.h b/include/linux/spi/s3c24xx.h index c91d10b82f08..440a71593162 100644 --- a/include/linux/spi/s3c24xx.h +++ b/include/linux/spi/s3c24xx.h @@ -20,6 +20,6 @@ struct s3c2410_spi_info { void (*set_cs)(struct s3c2410_spi_info *spi, int cs, int pol); }; -extern int s3c24xx_set_fiq(unsigned int irq, bool on); +extern int s3c24xx_set_fiq(unsigned int irq, u32 *ack_ptr, bool on); #endif /* __LINUX_SPI_S3C24XX_H */ -- cgit v1.2.3 From 81994e0ffc373e67ace4c98797c35f8213f07753 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 2 Sep 2019 22:33:24 +0200 Subject: fbdev: s3c2410fb: remove mach header dependency The s3c2410fb driver is too deeply intertwined with the s3c24xx platform code. Change it in a way that avoids the use of platform header files but having all interface data in a platform_data header, and the private register definitions next to the driver itself. One ugly bit here is that the driver pokes directly into gpio registers, which are owned by another driver. Passing the mapped addresses in platform_data is somewhat suboptimal, but it is a small improvement over the previous version. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20200806182059.2431-33-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski --- include/linux/platform_data/fb-s3c2410.h | 99 ++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 include/linux/platform_data/fb-s3c2410.h (limited to 'include/linux') diff --git a/include/linux/platform_data/fb-s3c2410.h b/include/linux/platform_data/fb-s3c2410.h new file mode 100644 index 000000000000..10c11e6316d6 --- /dev/null +++ b/include/linux/platform_data/fb-s3c2410.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2004 Arnaud Patard + * + * Inspired by pxafb.h +*/ + +#ifndef __ASM_PLAT_FB_S3C2410_H +#define __ASM_PLAT_FB_S3C2410_H __FILE__ + +#include + +struct s3c2410fb_hw { + unsigned long lcdcon1; + unsigned long lcdcon2; + unsigned long lcdcon3; + unsigned long lcdcon4; + unsigned long lcdcon5; +}; + +/* LCD description */ +struct s3c2410fb_display { + /* LCD type */ + unsigned type; +#define S3C2410_LCDCON1_DSCAN4 (0<<5) +#define S3C2410_LCDCON1_STN4 (1<<5) +#define S3C2410_LCDCON1_STN8 (2<<5) +#define S3C2410_LCDCON1_TFT (3<<5) + +#define S3C2410_LCDCON1_TFT1BPP (8<<1) +#define S3C2410_LCDCON1_TFT2BPP (9<<1) +#define S3C2410_LCDCON1_TFT4BPP (10<<1) +#define S3C2410_LCDCON1_TFT8BPP (11<<1) +#define S3C2410_LCDCON1_TFT16BPP (12<<1) +#define S3C2410_LCDCON1_TFT24BPP (13<<1) + + /* Screen size */ + unsigned short width; + unsigned short height; + + /* Screen info */ + unsigned short xres; + unsigned short yres; + unsigned short bpp; + + unsigned pixclock; /* pixclock in picoseconds */ + unsigned short left_margin; /* value in pixels (TFT) or HCLKs (STN) */ + unsigned short right_margin; /* value in pixels (TFT) or HCLKs (STN) */ + unsigned short hsync_len; /* value in pixels (TFT) or HCLKs (STN) */ + unsigned short upper_margin; /* value in lines (TFT) or 0 (STN) */ + unsigned short lower_margin; /* value in lines (TFT) or 0 (STN) */ + unsigned short vsync_len; /* value in lines (TFT) or 0 (STN) */ + + /* lcd configuration registers */ + unsigned long lcdcon5; +#define S3C2410_LCDCON5_BPP24BL (1<<12) +#define S3C2410_LCDCON5_FRM565 (1<<11) +#define S3C2410_LCDCON5_INVVCLK (1<<10) +#define S3C2410_LCDCON5_INVVLINE (1<<9) +#define S3C2410_LCDCON5_INVVFRAME (1<<8) +#define S3C2410_LCDCON5_INVVD (1<<7) +#define S3C2410_LCDCON5_INVVDEN (1<<6) +#define S3C2410_LCDCON5_INVPWREN (1<<5) +#define S3C2410_LCDCON5_INVLEND (1<<4) +#define S3C2410_LCDCON5_PWREN (1<<3) +#define S3C2410_LCDCON5_ENLEND (1<<2) +#define S3C2410_LCDCON5_BSWP (1<<1) +#define S3C2410_LCDCON5_HWSWP (1<<0) +}; + +struct s3c2410fb_mach_info { + + struct s3c2410fb_display *displays; /* attached displays info */ + unsigned num_displays; /* number of defined displays */ + unsigned default_display; + + /* GPIOs */ + + unsigned long gpcup; + unsigned long gpcup_mask; + unsigned long gpccon; + unsigned long gpccon_mask; + unsigned long gpdup; + unsigned long gpdup_mask; + unsigned long gpdcon; + unsigned long gpdcon_mask; + + void __iomem * gpccon_reg; + void __iomem * gpcup_reg; + void __iomem * gpdcon_reg; + void __iomem * gpdup_reg; + + /* lpc3600 control register */ + unsigned long lpcsel; +}; + +extern void s3c24xx_fb_set_platdata(struct s3c2410fb_mach_info *); + +#endif /* __ASM_PLAT_FB_S3C2410_H */ -- cgit v1.2.3 From 81b11a6a09964cfea4c525d22548790a1d92d38f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 6 Aug 2020 20:20:52 +0200 Subject: ARM: s3c: remove cpufreq header dependencies The cpufreq drivers are split between the machine directory and the drivers/cpufreq directory. In order to share header files after we convert s3c to multiplatform, those headers have to live in a different global location. Move them to linux/soc/samsung/ in lack of a better place. Signed-off-by: Arnd Bergmann Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20200806182059.2431-35-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski --- include/linux/soc/samsung/s3c-cpu-freq.h | 145 +++++++++++++ include/linux/soc/samsung/s3c-cpufreq-core.h | 291 +++++++++++++++++++++++++++ include/linux/soc/samsung/s3c-pm.h | 10 + 3 files changed, 446 insertions(+) create mode 100644 include/linux/soc/samsung/s3c-cpu-freq.h create mode 100644 include/linux/soc/samsung/s3c-cpufreq-core.h (limited to 'include/linux') diff --git a/include/linux/soc/samsung/s3c-cpu-freq.h b/include/linux/soc/samsung/s3c-cpu-freq.h new file mode 100644 index 000000000000..63e88fd5dea2 --- /dev/null +++ b/include/linux/soc/samsung/s3c-cpu-freq.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2006-2007 Simtec Electronics + * http://armlinux.simtec.co.uk/ + * Ben Dooks + * + * S3C CPU frequency scaling support - driver and board + */ +#ifndef __LINUX_SOC_SAMSUNG_S3C_CPU_FREQ_H +#define __LINUX_SOC_SAMSUNG_S3C_CPU_FREQ_H + +#include + +struct s3c_cpufreq_info; +struct s3c_cpufreq_board; +struct s3c_iotimings; + +/** + * struct s3c_freq - frequency information (mainly for core drivers) + * @fclk: The FCLK frequency in Hz. + * @armclk: The ARMCLK frequency in Hz. + * @hclk_tns: HCLK cycle time in 10ths of nano-seconds. + * @hclk: The HCLK frequency in Hz. + * @pclk: The PCLK frequency in Hz. + * + * This contains the frequency information about the current configuration + * mainly for the core drivers to ensure we do not end up passing about + * a large number of parameters. + * + * The @hclk_tns field is a useful cache for the parts of the drivers that + * need to calculate IO timings and suchlike. + */ +struct s3c_freq { + unsigned long fclk; + unsigned long armclk; + unsigned long hclk_tns; /* in 10ths of ns */ + unsigned long hclk; + unsigned long pclk; +}; + +/** + * struct s3c_cpufreq_freqs - s3c cpufreq notification information. + * @freqs: The cpufreq setting information. + * @old: The old clock settings. + * @new: The new clock settings. + * @pll_changing: Set if the PLL is changing. + * + * Wrapper 'struct cpufreq_freqs' so that any drivers receiving the + * notification can use this information that is not provided by just + * having the core frequency alone. + * + * The pll_changing flag is used to indicate if the PLL itself is + * being set during this change. This is important as the clocks + * will temporarily be set to the XTAL clock during this time, so + * drivers may want to close down their output during this time. + * + * Note, this is not being used by any current drivers and therefore + * may be removed in the future. + */ +struct s3c_cpufreq_freqs { + struct cpufreq_freqs freqs; + struct s3c_freq old; + struct s3c_freq new; + + unsigned int pll_changing:1; +}; + +#define to_s3c_cpufreq(_cf) container_of(_cf, struct s3c_cpufreq_freqs, freqs) + +/** + * struct s3c_clkdivs - clock divisor information + * @p_divisor: Divisor from FCLK to PCLK. + * @h_divisor: Divisor from FCLK to HCLK. + * @arm_divisor: Divisor from FCLK to ARMCLK (not all CPUs). + * @dvs: Non-zero if using DVS mode for ARMCLK. + * + * Divisor settings for the core clocks. + */ +struct s3c_clkdivs { + int p_divisor; + int h_divisor; + int arm_divisor; + unsigned char dvs; +}; + +#define PLLVAL(_m, _p, _s) (((_m) << 12) | ((_p) << 4) | (_s)) + +/** + * struct s3c_pllval - PLL value entry. + * @freq: The frequency for this entry in Hz. + * @pll_reg: The PLL register setting for this PLL value. + */ +struct s3c_pllval { + unsigned long freq; + unsigned long pll_reg; +}; + +/** + * struct s3c_cpufreq_board - per-board cpu frequency informatin + * @refresh: The SDRAM refresh period in nanoseconds. + * @auto_io: Set if the IO timing settings should be generated from the + * initialisation time hardware registers. + * @need_io: Set if the board has external IO on any of the chipselect + * lines that will require the hardware timing registers to be + * updated on a clock change. + * @max: The maxium frequency limits for the system. Any field that + * is left at zero will use the CPU's settings. + * + * This contains the board specific settings that affect how the CPU + * drivers chose settings. These include the memory refresh and IO + * timing information. + * + * Registration depends on the driver being used, the ARMCLK only + * implementation does not currently need this but the older style + * driver requires this to be available. + */ +struct s3c_cpufreq_board { + unsigned int refresh; + unsigned int auto_io:1; /* automatically init io timings. */ + unsigned int need_io:1; /* set if needs io timing support. */ + + /* any non-zero field in here is taken as an upper limit. */ + struct s3c_freq max; /* frequency limits */ +}; + +/* Things depending on frequency scaling. */ +#ifdef CONFIG_ARM_S3C_CPUFREQ +#define __init_or_cpufreq +#else +#define __init_or_cpufreq __init +#endif + +/* Board functions */ + +#ifdef CONFIG_ARM_S3C_CPUFREQ +extern int s3c_cpufreq_setboard(struct s3c_cpufreq_board *board); +#else + +static inline int s3c_cpufreq_setboard(struct s3c_cpufreq_board *board) +{ + return 0; +} +#endif /* CONFIG_ARM_S3C_CPUFREQ */ + +#endif diff --git a/include/linux/soc/samsung/s3c-cpufreq-core.h b/include/linux/soc/samsung/s3c-cpufreq-core.h new file mode 100644 index 000000000000..c578b07ccd5d --- /dev/null +++ b/include/linux/soc/samsung/s3c-cpufreq-core.h @@ -0,0 +1,291 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2006-2009 Simtec Electronics + * http://armlinux.simtec.co.uk/ + * Ben Dooks + * + * S3C CPU frequency scaling support - core support + */ +#ifndef __LINUX_SOC_SAMSUNG_S3C_CPUFREQ_CORE_H +#define __LINUX_SOC_SAMSUNG_S3C_CPUFREQ_CORE_H + +#include + +struct seq_file; + +#define MAX_BANKS (8) +#define S3C2412_MAX_IO (8) + +/** + * struct s3c2410_iobank_timing - IO bank timings for S3C2410 style timings + * @bankcon: The cached version of settings in this structure. + * @tacp: + * @tacs: Time from address valid to nCS asserted. + * @tcos: Time from nCS asserted to nOE or nWE asserted. + * @tacc: Time that nOE or nWE is asserted. + * @tcoh: Time nCS is held after nOE or nWE are released. + * @tcah: Time address is held for after + * @nwait_en: Whether nWAIT is enabled for this bank. + * + * This structure represents the IO timings for a S3C2410 style IO bank + * used by the CPU frequency support if it needs to change the settings + * of the IO. + */ +struct s3c2410_iobank_timing { + unsigned long bankcon; + unsigned int tacp; + unsigned int tacs; + unsigned int tcos; + unsigned int tacc; + unsigned int tcoh; /* nCS hold after nOE/nWE */ + unsigned int tcah; /* Address hold after nCS */ + unsigned char nwait_en; /* nWait enabled for bank. */ +}; + +/** + * struct s3c2412_iobank_timing - io timings for PL092 (S3C2412) style IO + * @idcy: The idle cycle time between transactions. + * @wstrd: nCS release to end of read cycle. + * @wstwr: nCS release to end of write cycle. + * @wstoen: nCS assertion to nOE assertion time. + * @wstwen: nCS assertion to nWE assertion time. + * @wstbrd: Burst ready delay. + * @smbidcyr: Register cache for smbidcyr value. + * @smbwstrd: Register cache for smbwstrd value. + * @smbwstwr: Register cache for smbwstwr value. + * @smbwstoen: Register cache for smbwstoen value. + * @smbwstwen: Register cache for smbwstwen value. + * @smbwstbrd: Register cache for smbwstbrd value. + * + * Timing information for a IO bank on an S3C2412 or similar system which + * uses a PL093 block. + */ +struct s3c2412_iobank_timing { + unsigned int idcy; + unsigned int wstrd; + unsigned int wstwr; + unsigned int wstoen; + unsigned int wstwen; + unsigned int wstbrd; + + /* register cache */ + unsigned char smbidcyr; + unsigned char smbwstrd; + unsigned char smbwstwr; + unsigned char smbwstoen; + unsigned char smbwstwen; + unsigned char smbwstbrd; +}; + +union s3c_iobank { + struct s3c2410_iobank_timing *io_2410; + struct s3c2412_iobank_timing *io_2412; +}; + +/** + * struct s3c_iotimings - Chip IO timings holder + * @bank: The timings for each IO bank. + */ +struct s3c_iotimings { + union s3c_iobank bank[MAX_BANKS]; +}; + +/** + * struct s3c_plltab - PLL table information. + * @vals: List of PLL values. + * @size: Size of the PLL table @vals. + */ +struct s3c_plltab { + struct s3c_pllval *vals; + int size; +}; + +/** + * struct s3c_cpufreq_config - current cpu frequency configuration + * @freq: The current settings for the core clocks. + * @max: Maxium settings, derived from core, board and user settings. + * @pll: The PLL table entry for the current PLL settings. + * @divs: The divisor settings for the core clocks. + * @info: The current core driver information. + * @board: The information for the board we are running on. + * @lock_pll: Set if the PLL settings cannot be changed. + * + * This is for the core drivers that need to know information about + * the current settings and values. It should not be needed by any + * device drivers. +*/ +struct s3c_cpufreq_config { + struct s3c_freq freq; + struct s3c_freq max; + struct clk *mpll; + struct cpufreq_frequency_table pll; + struct s3c_clkdivs divs; + struct s3c_cpufreq_info *info; /* for core, not drivers */ + struct s3c_cpufreq_board *board; + + unsigned int lock_pll:1; +}; + +/** + * struct s3c_cpufreq_info - Information for the CPU frequency driver. + * @name: The name of this implementation. + * @max: The maximum frequencies for the system. + * @latency: Transition latency to give to cpufreq. + * @locktime_m: The lock-time in uS for the MPLL. + * @locktime_u: The lock-time in uS for the UPLL. + * @locttime_bits: The number of bits each LOCKTIME field. + * @need_pll: Set if this driver needs to change the PLL values to achieve + * any frequency changes. This is really only need by devices like the + * S3C2410 where there is no or limited divider between the PLL and the + * ARMCLK. + * @get_iotiming: Get the current IO timing data, mainly for use at start. + * @set_iotiming: Update the IO timings from the cached copies calculated + * from the @calc_iotiming entry when changing the frequency. + * @calc_iotiming: Calculate and update the cached copies of the IO timings + * from the newly calculated frequencies. + * @calc_freqtable: Calculate (fill in) the given frequency table from the + * current frequency configuration. If the table passed in is NULL, + * then the return is the number of elements to be filled for allocation + * of the table. + * @set_refresh: Set the memory refresh configuration. + * @set_fvco: Set the PLL frequencies. + * @set_divs: Update the clock divisors. + * @calc_divs: Calculate the clock divisors. + */ +struct s3c_cpufreq_info { + const char *name; + struct s3c_freq max; + + unsigned int latency; + + unsigned int locktime_m; + unsigned int locktime_u; + unsigned char locktime_bits; + + unsigned int need_pll:1; + + /* driver routines */ + + int (*get_iotiming)(struct s3c_cpufreq_config *cfg, + struct s3c_iotimings *timings); + + void (*set_iotiming)(struct s3c_cpufreq_config *cfg, + struct s3c_iotimings *timings); + + int (*calc_iotiming)(struct s3c_cpufreq_config *cfg, + struct s3c_iotimings *timings); + + int (*calc_freqtable)(struct s3c_cpufreq_config *cfg, + struct cpufreq_frequency_table *t, + size_t table_size); + + void (*debug_io_show)(struct seq_file *seq, + struct s3c_cpufreq_config *cfg, + union s3c_iobank *iob); + + void (*set_refresh)(struct s3c_cpufreq_config *cfg); + void (*set_fvco)(struct s3c_cpufreq_config *cfg); + void (*set_divs)(struct s3c_cpufreq_config *cfg); + int (*calc_divs)(struct s3c_cpufreq_config *cfg); +}; + +extern int s3c_cpufreq_register(struct s3c_cpufreq_info *info); + +extern int s3c_plltab_register(struct cpufreq_frequency_table *plls, + unsigned int plls_no); + +/* exports and utilities for debugfs */ +extern struct s3c_cpufreq_config *s3c_cpufreq_getconfig(void); +extern struct s3c_iotimings *s3c_cpufreq_getiotimings(void); + +#ifdef CONFIG_ARM_S3C24XX_CPUFREQ_DEBUGFS +#define s3c_cpufreq_debugfs_call(x) x +#else +#define s3c_cpufreq_debugfs_call(x) NULL +#endif + +/* Useful utility functions. */ + +extern struct clk *s3c_cpufreq_clk_get(struct device *, const char *); + +/* S3C2410 and compatible exported functions */ + +extern void s3c2410_cpufreq_setrefresh(struct s3c_cpufreq_config *cfg); +extern void s3c2410_set_fvco(struct s3c_cpufreq_config *cfg); + +#ifdef CONFIG_S3C2410_IOTIMING +extern void s3c2410_iotiming_debugfs(struct seq_file *seq, + struct s3c_cpufreq_config *cfg, + union s3c_iobank *iob); + +extern int s3c2410_iotiming_calc(struct s3c_cpufreq_config *cfg, + struct s3c_iotimings *iot); + +extern int s3c2410_iotiming_get(struct s3c_cpufreq_config *cfg, + struct s3c_iotimings *timings); + +extern void s3c2410_iotiming_set(struct s3c_cpufreq_config *cfg, + struct s3c_iotimings *iot); +#else +#define s3c2410_iotiming_debugfs NULL +#define s3c2410_iotiming_calc NULL +#define s3c2410_iotiming_get NULL +#define s3c2410_iotiming_set NULL +#endif /* CONFIG_S3C2410_IOTIMING */ + +/* S3C2412 compatible routines */ + +#ifdef CONFIG_S3C2412_IOTIMING +extern void s3c2412_iotiming_debugfs(struct seq_file *seq, + struct s3c_cpufreq_config *cfg, + union s3c_iobank *iob); + +extern int s3c2412_iotiming_get(struct s3c_cpufreq_config *cfg, + struct s3c_iotimings *timings); + +extern int s3c2412_iotiming_calc(struct s3c_cpufreq_config *cfg, + struct s3c_iotimings *iot); + +extern void s3c2412_iotiming_set(struct s3c_cpufreq_config *cfg, + struct s3c_iotimings *iot); +#else +#define s3c2412_iotiming_debugfs NULL +#define s3c2412_iotiming_calc NULL +#define s3c2412_iotiming_get NULL +#define s3c2412_iotiming_set NULL +#endif /* CONFIG_S3C2412_IOTIMING */ + +#ifdef CONFIG_ARM_S3C24XX_CPUFREQ_DEBUG +#define s3c_freq_dbg(x...) printk(KERN_INFO x) +#else +#define s3c_freq_dbg(x...) do { if (0) printk(x); } while (0) +#endif /* CONFIG_ARM_S3C24XX_CPUFREQ_DEBUG */ + +#ifdef CONFIG_ARM_S3C24XX_CPUFREQ_IODEBUG +#define s3c_freq_iodbg(x...) printk(KERN_INFO x) +#else +#define s3c_freq_iodbg(x...) do { if (0) printk(x); } while (0) +#endif /* CONFIG_ARM_S3C24XX_CPUFREQ_IODEBUG */ + +static inline int s3c_cpufreq_addfreq(struct cpufreq_frequency_table *table, + int index, size_t table_size, + unsigned int freq) +{ + if (index < 0) + return index; + + if (table) { + if (index >= table_size) + return -ENOMEM; + + s3c_freq_dbg("%s: { %d = %u kHz }\n", + __func__, index, freq); + + table[index].driver_data = index; + table[index].frequency = freq; + } + + return index + 1; +} + +#endif diff --git a/include/linux/soc/samsung/s3c-pm.h b/include/linux/soc/samsung/s3c-pm.h index 730bd1d3d09a..f9164559c99f 100644 --- a/include/linux/soc/samsung/s3c-pm.h +++ b/include/linux/soc/samsung/s3c-pm.h @@ -81,4 +81,14 @@ extern void s3c_pm_check_store(void); #define s3c_pm_check_store() do { } while (0) #endif +/* system device subsystems */ + +extern struct bus_type s3c2410_subsys; +extern struct bus_type s3c2410a_subsys; +extern struct bus_type s3c2412_subsys; +extern struct bus_type s3c2416_subsys; +extern struct bus_type s3c2440_subsys; +extern struct bus_type s3c2442_subsys; +extern struct bus_type s3c2443_subsys; + #endif -- cgit v1.2.3 From 44c01f5ce1c7518886a87d5522528e30e0b4d9f8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 6 Aug 2020 20:20:53 +0200 Subject: cpufreq: s3c2412: use global s3c2412_cpufreq_setrefresh There are two identical copies of the s3c2412_cpufreq_setrefresh function: a static one in the cpufreq driver and a global version in iotiming-s3c2412.c. As the function requires the use of a hardcoded register address from a header that we want to not be visible to drivers, just move the existing global function and add a declaration in one of the cpufreq header files. Signed-off-by: Arnd Bergmann Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20200806182059.2431-36-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski --- include/linux/soc/samsung/s3c-cpufreq-core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/soc/samsung/s3c-cpufreq-core.h b/include/linux/soc/samsung/s3c-cpufreq-core.h index c578b07ccd5d..e0c7217a0f53 100644 --- a/include/linux/soc/samsung/s3c-cpufreq-core.h +++ b/include/linux/soc/samsung/s3c-cpufreq-core.h @@ -248,6 +248,7 @@ extern int s3c2412_iotiming_calc(struct s3c_cpufreq_config *cfg, extern void s3c2412_iotiming_set(struct s3c_cpufreq_config *cfg, struct s3c_iotimings *iot); +extern void s3c2412_cpufreq_setrefresh(struct s3c_cpufreq_config *cfg); #else #define s3c2412_iotiming_debugfs NULL #define s3c2412_iotiming_calc NULL -- cgit v1.2.3 From c38758e3d574380ccfa583793be14c1cc8a322ff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 6 Aug 2020 20:20:54 +0200 Subject: cpufreq: s3c24xx: move low-level clk reg access into platform code Rather than have the cpufreq drivers touch include the common headers to get the constants, add a small indirection. This is still not the proper way that would do this through the common clk API, but it lets us kill off the header file usage. Signed-off-by: Arnd Bergmann Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20200806182059.2431-37-krzk@kernel.org [krzk: Rebase and fix -Wold-style-definition] Signed-off-by: Krzysztof Kozlowski --- include/linux/soc/samsung/s3c-cpufreq-core.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/samsung/s3c-cpufreq-core.h b/include/linux/soc/samsung/s3c-cpufreq-core.h index e0c7217a0f53..3b278afb769b 100644 --- a/include/linux/soc/samsung/s3c-cpufreq-core.h +++ b/include/linux/soc/samsung/s3c-cpufreq-core.h @@ -289,4 +289,11 @@ static inline int s3c_cpufreq_addfreq(struct cpufreq_frequency_table *table, return index + 1; } +u32 s3c2440_read_camdivn(void); +void s3c2440_write_camdivn(u32 camdiv); +u32 s3c24xx_read_clkdivn(void); +void s3c24xx_write_clkdivn(u32 clkdiv); +u32 s3c24xx_read_mpllcon(void); +void s3c24xx_write_locktime(u32 locktime); + #endif -- cgit v1.2.3 From 8d5930dfb7edbf136f2d9900be34ca7af4ba38c1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Jul 2020 20:07:10 -0400 Subject: skb_copy_and_csum_bits(): don't bother with the last argument it's always 0 Signed-off-by: Al Viro --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 46881d902124..31a6d2fce071 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3535,7 +3535,7 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, - int len, __wsum csum); + int len); int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, unsigned int flags); -- cgit v1.2.3 From ba171d3f0850003216fd1a85190d17b1feddb961 Mon Sep 17 00:00:00 2001 From: Cedric Neveux Date: Mon, 4 Mar 2019 08:54:23 +0100 Subject: driver: tee: Handle NULL pointer indication from client TEE Client introduce a new capability "TEE_GEN_CAP_MEMREF_NULL" to handle the support of the shared memory buffer with a NULL pointer. This capability depends on TEE Capabilities and driver support. Driver and TEE exchange capabilities at driver initialization. Signed-off-by: Michael Whitfield Signed-off-by: Cedric Neveux Reviewed-by: Joakim Bech Tested-by: Joakim Bech (QEMU) Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index d074302989dd..cdd049a724b1 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -47,6 +47,8 @@ struct tee_shm_pool; * and just return with an error code. It is needed for requests * that arises from TEE based kernel drivers that should be * non-blocking in nature. + * @cap_memref_null: flag indicating if the TEE Client support shared + * memory buffer with a NULL pointer. */ struct tee_context { struct tee_device *teedev; @@ -54,6 +56,7 @@ struct tee_context { struct kref refcount; bool releasing; bool supp_nowait; + bool cap_memref_null; }; struct tee_param_memref { -- cgit v1.2.3 From 6b0a249a301e2af9adda84adbced3a2988248b95 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 21 Aug 2020 11:44:18 -0700 Subject: bpf: Implement link_query for bpf iterators This patch implemented bpf_link callback functions show_fdinfo and fill_link_info to support link_query interface. The general interface for show_fdinfo and fill_link_info will print/fill the target_name. Each targets can register show_fdinfo and fill_link_info callbacks to print/fill more target specific information. For example, the below is a fdinfo result for a bpf task iterator. $ cat /proc/1749/fdinfo/7 pos: 0 flags: 02000000 mnt_id: 14 link_type: iter link_id: 11 prog_tag: 990e1f8152f7e54f prog_id: 59 target_name: task Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200821184418.574122-1-yhs@fb.com --- include/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a9b7185a6b37..529e9b183eeb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1218,12 +1218,18 @@ typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, union bpf_iter_link_info *linfo, struct bpf_iter_aux_info *aux); typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux); +typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux, + struct seq_file *seq); +typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux, + struct bpf_link_info *info); #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; bpf_iter_attach_target_t attach_target; bpf_iter_detach_target_t detach_target; + bpf_iter_show_fdinfo_t show_fdinfo; + bpf_iter_fill_link_info_t fill_link_info; u32 ctx_arg_info_size; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; const struct bpf_iter_seq_info *seq_info; -- cgit v1.2.3 From b76f22269028fb252727a696084c70494d80a52c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 21 Aug 2020 11:44:19 -0700 Subject: bpf: Implement link_query callbacks in map element iterators For bpf_map_elem and bpf_sk_local_storage bpf iterators, additional map_id should be shown for fdinfo and userspace query. For example, the following is for a bpf_map_elem iterator. $ cat /proc/1753/fdinfo/9 pos: 0 flags: 02000000 mnt_id: 14 link_type: iter link_id: 34 prog_tag: 104be6d3fe45e6aa prog_id: 173 target_name: bpf_map_elem map_id: 127 Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200821184419.574240-1-yhs@fb.com --- include/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 529e9b183eeb..30c144af894a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1256,6 +1256,10 @@ int bpf_iter_new_fd(struct bpf_link *link); bool bpf_link_is_iter(struct bpf_link *link); struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop); int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx); +void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux, + struct seq_file *seq); +int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux, + struct bpf_link_info *info); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); -- cgit v1.2.3 From 7b219da43f94a3b4d5a8aa4cc52b75b34f0301ec Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 21 Aug 2020 11:29:43 +0100 Subject: net: sk_msg: Simplify sk_psock initialization Initializing psock->sk_proto and other saved callbacks is only done in sk_psock_update_proto, after sk_psock_init has returned. The logic for this is difficult to follow, and needlessly complex. Instead, initialize psock->sk_proto whenever we allocate a new psock. Additionally, assert the following invariants: * The SK has no ULP: ULP does it's own finagling of sk->sk_prot * sk_user_data is unused: we need it to store sk_psock Protect our access to sk_user_data with sk_callback_lock, which is what other users like reuseport arrays, etc. do. The result is that an sk_psock is always fully initialized, and that psock->sk_proto is always the "original" struct proto. The latter allows us to use psock->sk_proto when initializing IPv6 TCP / UDP callbacks for sockmap. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200821102948.21918-2-lmb@cloudflare.com --- include/linux/skmsg.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 1e9ed840b9fc..3119928fc103 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -340,23 +340,6 @@ static inline void sk_psock_update_proto(struct sock *sk, struct sk_psock *psock, struct proto *ops) { - /* Initialize saved callbacks and original proto only once, since this - * function may be called multiple times for a psock, e.g. when - * psock->progs.msg_parser is updated. - * - * Since we've not installed the new proto, psock is not yet in use and - * we can initialize it without synchronization. - */ - if (!psock->sk_proto) { - struct proto *orig = READ_ONCE(sk->sk_prot); - - psock->saved_unhash = orig->unhash; - psock->saved_close = orig->close; - psock->saved_write_space = sk->sk_write_space; - - psock->sk_proto = orig; - } - /* Pairs with lockless read in sk_clone_lock() */ WRITE_ONCE(sk->sk_prot, ops); } -- cgit v1.2.3 From 13b79d3ffbb8add9e2a6d604db2b49f241b97303 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 21 Aug 2020 11:29:45 +0100 Subject: bpf: sockmap: Call sock_map_update_elem directly Don't go via map->ops to call sock_map_update_elem, since we know what function to call in bpf_map_update_value. Since we currently don't allow calling map_update_elem from BPF context, we can remove ops->map_update_elem and rename the function to sock_map_update_elem_sys. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200821102948.21918-4-lmb@cloudflare.com --- include/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 30c144af894a..81f38e2fda78 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1648,6 +1648,7 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, struct bpf_prog *old, u32 which); int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); +int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); void sock_map_unhash(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else @@ -1669,6 +1670,12 @@ static inline int sock_map_prog_detach(const union bpf_attr *attr, { return -EOPNOTSUPP; } + +static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, + u64 flags) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_BPF_STREAM_PARSER */ #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) -- cgit v1.2.3 From 781cb90b0529b5bb84c63691fe42a7c26d197aec Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Jul 2020 17:30:40 -0700 Subject: platform_data: ad7793.h: drop a duplicated word Drop the repeated word "and" in a comment. Signed-off-by: Randy Dunlap Cc: Lars-Peter Clausen Cc: Jonathan Cameron Signed-off-by: Jonathan Cameron --- include/linux/platform_data/ad7793.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/ad7793.h b/include/linux/platform_data/ad7793.h index 576c7f962c4e..7c697e58f02a 100644 --- a/include/linux/platform_data/ad7793.h +++ b/include/linux/platform_data/ad7793.h @@ -40,7 +40,7 @@ enum ad7793_bias_voltage { * enum ad7793_refsel - AD7793 reference voltage selection * @AD7793_REFSEL_REFIN1: External reference applied between REFIN1(+) * and REFIN1(-). - * @AD7793_REFSEL_REFIN2: External reference applied between REFIN2(+) and + * @AD7793_REFSEL_REFIN2: External reference applied between REFIN2(+) * and REFIN1(-). Only valid for AD7795/AD7796. * @AD7793_REFSEL_INTERNAL: Internal 1.17 V reference. */ -- cgit v1.2.3 From e2d977c9f1abd1d199b412f8f83c1727808b794d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 14 Aug 2020 12:19:35 +0200 Subject: timekeeping: Provide multi-timestamp accessor to NMI safe timekeeper printk wants to store various timestamps (MONOTONIC, REALTIME, BOOTTIME) to make correlation of dmesg from several systems easier. Provide an interface to retrieve all three timestamps in one go. There are some caveats: 1) Boot time and late sleep time injection Boot time is a racy access on 32bit systems if the sleep time injection happens late during resume and not in timekeeping_resume(). That could be avoided by expanding struct tk_read_base with boot offset for 32bit and adding more overhead to the update. As this is a hard to observe once per resume event which can be filtered with reasonable effort using the accurate mono/real timestamps, it's probably not worth the trouble. Aside of that it might be possible on 32 and 64 bit to observe the following when the sleep time injection happens late: CPU 0 CPU 1 timekeeping_resume() ktime_get_fast_timestamps() mono, real = __ktime_get_real_fast() inject_sleep_time() update boot offset boot = mono + bootoffset; That means that boot time already has the sleep time adjustment, but real time does not. On the next readout both are in sync again. Preventing this for 64bit is not really feasible without destroying the careful cache layout of the timekeeper because the sequence count and struct tk_read_base would then need two cache lines instead of one. 2) Suspend/resume timestamps Access to the time keeper clock source is disabled accross the innermost steps of suspend/resume. The accessors still work, but the timestamps are frozen until time keeping is resumed which happens very early. For regular suspend/resume there is no observable difference vs. sched clock, but it might affect some of the nasty low level debug printks. OTOH, access to sched clock is not guaranteed accross suspend/resume on all systems either so it depends on the hardware in use. If that turns out to be a real problem then this could be mitigated by using sched clock in a similar way as during early boot. But it's not as trivial as on early boot because it needs some careful protection against the clock monotonic timestamp jumping backwards on resume. Signed-off-by: Thomas Gleixner Tested-by: Petr Mladek Link: https://lore.kernel.org/r/20200814115512.159981360@linutronix.de --- include/linux/timekeeping.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index d5471d6fa778..7f7e4a3f4394 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -222,6 +222,18 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); +/* + * struct ktime_timestanps - Simultaneous mono/boot/real timestamps + * @mono: Monotonic timestamp + * @boot: Boottime timestamp + * @real: Realtime timestamp + */ +struct ktime_timestamps { + u64 mono; + u64 boot; + u64 real; +}; + /** * struct system_time_snapshot - simultaneous raw/real time capture with * counter value @@ -280,6 +292,9 @@ extern int get_device_system_crosststamp( */ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot); +/* NMI safe mono/boot/realtime timestamps */ +extern void ktime_get_fast_timestamps(struct ktime_timestamps *snap); + /* * Persistent clock related interfaces */ -- cgit v1.2.3 From 2152fbbd47c06c4f50ad265ec1b0c43673bee3e8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 3 Jul 2020 09:07:29 -0700 Subject: soc: ti: pm33xx: Simplify RTC usage to prepare to drop platform data We must re-enable the RTC module clock enabled in RTC+DDR suspend, and pm33xx has been using platform data callbacks for that. Looks like for retention suspend the RTC module clock must not be re-enabled. To remove the legacy platform data callbacks, and eventually be able to drop the RTC legacy platform data, let's manage the RTC module clock and register range directly in pm33xx. Acked-by: Santosh Shilimkar Signed-off-by: Tony Lindgren --- include/linux/platform_data/pm33xx.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/pm33xx.h b/include/linux/platform_data/pm33xx.h index 644af1d89cfa..7037ba7a53ca 100644 --- a/include/linux/platform_data/pm33xx.h +++ b/include/linux/platform_data/pm33xx.h @@ -54,11 +54,8 @@ struct am33xx_pm_platform_data { void (*begin_suspend)(void); void (*finish_suspend)(void); struct am33xx_pm_sram_addr *(*get_sram_addrs)(void); - void __iomem *(*get_rtc_base_addr)(void); void (*save_context)(void); void (*restore_context)(void); - void (*prepare_rtc_suspend)(void); - void (*prepare_rtc_resume)(void); int (*check_off_mode_enable)(void); }; -- cgit v1.2.3 From 70a217f1976f75a6cfe8223e5669ad7b405daaad Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:00:14 -0700 Subject: tcp: Use a struct to represent a saved_syn The TCP_SAVE_SYN has both the network header and tcp header. The total length of the saved syn packet is currently stored in the first 4 bytes (u32) of an array and the actual packet data is stored after that. A later patch will add a bpf helper that allows to get the tcp header alone from the saved syn without the network header. It will be more convenient to have a direct offset to a specific header instead of re-parsing it. This requires to separately store the network hdrlen. The total header length (i.e. network + tcp) is still needed for the current usage in getsockopt. Although this total length can be obtained by looking into the tcphdr and then get the (th->doff << 2), this patch chooses to directly store the tcp hdrlen in the second four bytes of this newly created "struct saved_syn". By using a new struct, it can give a readable name to each individual header length. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200820190014.2883694-1-kafai@fb.com --- include/linux/tcp.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 14b62d7df942..2088d5a079af 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -406,7 +406,7 @@ struct tcp_sock { * socket. Used to retransmit SYNACKs etc. */ struct request_sock __rcu *fastopen_rsk; - u32 *saved_syn; + struct saved_syn *saved_syn; }; enum tsq_enum { @@ -484,6 +484,11 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp) tp->saved_syn = NULL; } +static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn) +{ + return saved_syn->network_hdrlen + saved_syn->tcp_hdrlen; +} + struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, const struct sk_buff *orig_skb); -- cgit v1.2.3 From 7656d68455891f7fc6689f95415fd59e7a1d629b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:00:33 -0700 Subject: tcp: Add saw_unknown to struct tcp_options_received In a later patch, the bpf prog only wants to be called to handle a header option if that particular header option cannot be handled by the kernel. This unknown option could be written by the peer's bpf-prog. It could also be a new standard option that the running kernel does not support it while a bpf-prog can handle it. This patch adds a "saw_unknown" bit to "struct tcp_options_received" and it uses an existing one byte hole to do that. "saw_unknown" will be set in tcp_parse_options() if it sees an option that the kernel cannot handle. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200820190033.2884430-1-kafai@fb.com --- include/linux/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2088d5a079af..29d166263ae7 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -92,6 +92,8 @@ struct tcp_options_received { smc_ok : 1, /* SMC seen on SYN packet */ snd_wscale : 4, /* Window scaling received from sender */ rcv_wscale : 4; /* Window scaling to send to receiver */ + u8 saw_unknown:1, /* Received unknown option */ + unused:7; u8 num_sacks; /* Number of SACK blocks */ u16 user_mss; /* mss requested by user in ioctl */ u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ -- cgit v1.2.3 From c9985d09e18965131958102f4b67fa1e742df335 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:00:58 -0700 Subject: bpf: sock_ops: Change some members of sock_ops_kern from u32 to u8 A later patch needs to add a few pointers and a few u8 to sock_ops_kern. Hence, this patch saves some spaces by moving some of the existing members from u32 to u8 so that the later patch can still fit everything in a cacheline. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200820190058.2885640-1-kafai@fb.com --- include/linux/filter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0a355b005bf4..c427dfa5f908 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1236,13 +1236,13 @@ struct bpf_sock_addr_kern { struct bpf_sock_ops_kern { struct sock *sk; - u32 op; union { u32 args[4]; u32 reply; u32 replylong[4]; }; - u32 is_fullsock; + u8 op; + u8 is_fullsock; u64 temp; /* temp and everything after is not * initialized to 0 before calling * the BPF program. New fields that -- cgit v1.2.3 From 0813a841566f0962a5551be7749b43c45f0022a0 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:01:04 -0700 Subject: bpf: tcp: Allow bpf prog to write and parse TCP header option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Note: The TCP changes here is mainly to implement the bpf pieces into the bpf_skops_*() functions introduced in the earlier patches. ] The earlier effort in BPF-TCP-CC allows the TCP Congestion Control algorithm to be written in BPF. It opens up opportunities to allow a faster turnaround time in testing/releasing new congestion control ideas to production environment. The same flexibility can be extended to writing TCP header option. It is not uncommon that people want to test new TCP header option to improve the TCP performance. Another use case is for data-center that has a more controlled environment and has more flexibility in putting header options for internal only use. For example, we want to test the idea in putting maximum delay ACK in TCP header option which is similar to a draft RFC proposal [1]. This patch introduces the necessary BPF API and use them in the TCP stack to allow BPF_PROG_TYPE_SOCK_OPS program to parse and write TCP header options. It currently supports most of the TCP packet except RST. Supported TCP header option: ─────────────────────────── This patch allows the bpf-prog to write any option kind. Different bpf-progs can write its own option by calling the new helper bpf_store_hdr_opt(). The helper will ensure there is no duplicated option in the header. By allowing bpf-prog to write any option kind, this gives a lot of flexibility to the bpf-prog. Different bpf-prog can write its own option kind. It could also allow the bpf-prog to support a recently standardized option on an older kernel. Sockops Callback Flags: ────────────────────── The bpf program will only be called to parse/write tcp header option if the following newly added callback flags are enabled in tp->bpf_sock_ops_cb_flags: BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG A few words on the PARSE CB flags. When the above PARSE CB flags are turned on, the bpf-prog will be called on packets received at a sk that has at least reached the ESTABLISHED state. The parsing of the SYN-SYNACK-ACK will be discussed in the "3 Way HandShake" section. The default is off for all of the above new CB flags, i.e. the bpf prog will not be called to parse or write bpf hdr option. There are details comment on these new cb flags in the UAPI bpf.h. sock_ops->skb_data and bpf_load_hdr_opt() ───────────────────────────────────────── sock_ops->skb_data and sock_ops->skb_data_end covers the whole TCP header and its options. They are read only. The new bpf_load_hdr_opt() helps to read a particular option "kind" from the skb_data. Please refer to the comment in UAPI bpf.h. It has details on what skb_data contains under different sock_ops->op. 3 Way HandShake ─────────────── The bpf-prog can learn if it is sending SYN or SYNACK by reading the sock_ops->skb_tcp_flags. * Passive side When writing SYNACK (i.e. sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB), the received SYN skb will be available to the bpf prog. The bpf prog can use the SYN skb (which may carry the header option sent from the remote bpf prog) to decide what bpf header option should be written to the outgoing SYNACK skb. The SYN packet can be obtained by getsockopt(TCP_BPF_SYN*). More on this later. Also, the bpf prog can learn if it is in syncookie mode (by checking sock_ops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE). The bpf prog can store the received SYN pkt by using the existing bpf_setsockopt(TCP_SAVE_SYN). The example in a later patch does it. [ Note that the fullsock here is a listen sk, bpf_sk_storage is not very useful here since the listen sk will be shared by many concurrent connection requests. Extending bpf_sk_storage support to request_sock will add weight to the minisock and it is not necessary better than storing the whole ~100 bytes SYN pkt. ] When the connection is established, the bpf prog will be called in the existing PASSIVE_ESTABLISHED_CB callback. At that time, the bpf prog can get the header option from the saved syn and then apply the needed operation to the newly established socket. The later patch will use the max delay ack specified in the SYN header and set the RTO of this newly established connection as an example. The received ACK (that concludes the 3WHS) will also be available to the bpf prog during PASSIVE_ESTABLISHED_CB through the sock_ops->skb_data. It could be useful in syncookie scenario. More on this later. There is an existing getsockopt "TCP_SAVED_SYN" to return the whole saved syn pkt which includes the IP[46] header and the TCP header. A few "TCP_BPF_SYN*" getsockopt has been added to allow specifying where to start getting from, e.g. starting from TCP header, or from IP[46] header. The new getsockopt(TCP_BPF_SYN*) will also know where it can get the SYN's packet from: - (a) the just received syn (available when the bpf prog is writing SYNACK) and it is the only way to get SYN during syncookie mode. or - (b) the saved syn (available in PASSIVE_ESTABLISHED_CB and also other existing CB). The bpf prog does not need to know where the SYN pkt is coming from. The getsockopt(TCP_BPF_SYN*) will hide this details. Similarly, a flags "BPF_LOAD_HDR_OPT_TCP_SYN" is also added to bpf_load_hdr_opt() to read a particular header option from the SYN packet. * Fastopen Fastopen should work the same as the regular non fastopen case. This is a test in a later patch. * Syncookie For syncookie, the later example patch asks the active side's bpf prog to resend the header options in ACK. The server can use bpf_load_hdr_opt() to look at the options in this received ACK during PASSIVE_ESTABLISHED_CB. * Active side The bpf prog will get a chance to write the bpf header option in the SYN packet during WRITE_HDR_OPT_CB. The received SYNACK pkt will also be available to the bpf prog during the existing ACTIVE_ESTABLISHED_CB callback through the sock_ops->skb_data and bpf_load_hdr_opt(). * Turn off header CB flags after 3WHS If the bpf prog does not need to write/parse header options beyond the 3WHS, the bpf prog can clear the bpf_sock_ops_cb_flags to avoid being called for header options. Or the bpf-prog can select to leave the UNKNOWN_HDR_OPT_CB_FLAG on so that the kernel will only call it when there is option that the kernel cannot handle. [1]: draft-wang-tcpm-low-latency-opt-00 https://tools.ietf.org/html/draft-wang-tcpm-low-latency-opt-00 Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200820190104.2885895-1-kafai@fb.com --- include/linux/bpf-cgroup.h | 25 +++++++++++++++++++++++++ include/linux/filter.h | 4 ++++ 2 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 64f367044e25..2f98d2fce62e 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -279,6 +279,31 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL) +/* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a + * fullsock and its parent fullsock cannot be traced by + * sk_to_full_sk(). + * + * e.g. sock_ops->sk is a request_sock and it is under syncookie mode. + * Its listener-sk is not attached to the rsk_listener. + * In this case, the caller holds the listener-sk (unlocked), + * set its sock_ops->sk to req_sk, and call this SOCK_OPS"_SK" with + * the listener-sk such that the cgroup-bpf-progs of the + * listener-sk will be run. + * + * Regardless of syncookie mode or not, + * calling bpf_setsockopt on listener-sk will not make sense anyway, + * so passing 'sock_ops->sk == req_sk' to the bpf prog is appropriate here. + */ +#define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_sock_ops(sk, \ + sock_ops, \ + BPF_CGROUP_SOCK_OPS); \ + __ret; \ +}) + #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \ ({ \ int __ret = 0; \ diff --git a/include/linux/filter.h b/include/linux/filter.h index c427dfa5f908..995625950cc1 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1241,8 +1241,12 @@ struct bpf_sock_ops_kern { u32 reply; u32 replylong[4]; }; + struct sk_buff *syn_skb; + struct sk_buff *skb; + void *skb_data_end; u8 op; u8 is_fullsock; + u8 remaining_opt_len; u64 temp; /* temp and everything after is not * initialized to 0 before calling * the BPF program. New fields that -- cgit v1.2.3 From 267cf9fa43d1c9d525d5d818a8651f2900e3aa9e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:01:23 -0700 Subject: tcp: bpf: Optionally store mac header in TCP_SAVE_SYN This patch is adapted from Eric's patch in an earlier discussion [1]. The TCP_SAVE_SYN currently only stores the network header and tcp header. This patch allows it to optionally store the mac header also if the setsockopt's optval is 2. It requires one more bit for the "save_syn" bit field in tcp_sock. This patch achieves this by moving the syn_smc bit next to the is_mptcp. The syn_smc is currently used with the TCP experimental option. Since syn_smc is only used when CONFIG_SMC is enabled, this patch also puts the "IS_ENABLED(CONFIG_SMC)" around it like the is_mptcp did with "IS_ENABLED(CONFIG_MPTCP)". The mac_hdrlen is also stored in the "struct saved_syn" to allow a quick offset from the bpf prog if it chooses to start getting from the network header or the tcp header. [1]: https://lore.kernel.org/netdev/CANn89iLJNWh6bkH7DNhy_kmcAexuUCccqERqe7z2QsvPhGrYPQ@mail.gmail.com/ Suggested-by: Eric Dumazet Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/bpf/20200820190123.2886935-1-kafai@fb.com --- include/linux/tcp.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 29d166263ae7..56ff2952edaf 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -239,14 +239,13 @@ struct tcp_sock { repair : 1, frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */ u8 repair_queue; - u8 syn_data:1, /* SYN includes data */ + u8 save_syn:2, /* Save headers of SYN packet */ + syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_fastopen_ch:1, /* Active TFO re-enabling probe */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ - save_syn:1, /* Save headers of SYN packet */ - is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ - syn_smc:1; /* SYN includes SMC */ + is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ u32 tlp_high_seq; /* snd_nxt at the time of TLP */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ @@ -393,6 +392,9 @@ struct tcp_sock { #if IS_ENABLED(CONFIG_MPTCP) bool is_mptcp; #endif +#if IS_ENABLED(CONFIG_SMC) + bool syn_smc; /* SYN includes SMC */ +#endif #ifdef CONFIG_TCP_MD5SIG /* TCP AF-Specific parts; only used by MD5 Signature support so far */ @@ -488,7 +490,8 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp) static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn) { - return saved_syn->network_hdrlen + saved_syn->tcp_hdrlen; + return saved_syn->mac_hdrlen + saved_syn->network_hdrlen + + saved_syn->tcp_hdrlen; } struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, -- cgit v1.2.3 From 14e2ac8de0f91f12122a49f09897b0cd05256460 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 24 Jul 2020 09:00:01 +0200 Subject: kcsan: Support compounded read-write instrumentation Add support for compounded read-write instrumentation if supported by the compiler. Adds the necessary instrumentation functions, and a new type which is used to generate a more descriptive report. Furthermore, such compounded memory access instrumentation is excluded from the "assume aligned writes up to word size are atomic" rule, because we cannot assume that the compiler emits code that is atomic for compound ops. LLVM/Clang added support for the feature in: https://github.com/llvm/llvm-project/commit/785d41a261d136b64ab6c15c5d35f2adc5ad53e3 The new instrumentation is emitted for sets of memory accesses in the same basic block to the same address with at least one read appearing before a write. These typically result from compound operations such as ++, --, +=, -=, |=, &=, etc. but also equivalent forms such as "var = var + 1". Where the compiler determines that it is equivalent to emit a call to a single __tsan_read_write instead of separate __tsan_read and __tsan_write, we can then benefit from improved performance and better reporting for such access patterns. The new reports now show that the ops are both reads and writes, for example: read-write to 0xffffffff90548a38 of 8 bytes by task 143 on cpu 3: test_kernel_rmw_array+0x45/0xa0 access_thread+0x71/0xb0 kthread+0x21e/0x240 ret_from_fork+0x22/0x30 read-write to 0xffffffff90548a38 of 8 bytes by task 144 on cpu 2: test_kernel_rmw_array+0x45/0xa0 access_thread+0x71/0xb0 kthread+0x21e/0x240 ret_from_fork+0x22/0x30 Acked-by: Peter Zijlstra (Intel) Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan-checks.h | 45 +++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index c5f6c1dcf7e3..cf14840609ce 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -7,19 +7,13 @@ #include #include -/* - * ACCESS TYPE MODIFIERS - * - * : normal read access; - * WRITE : write access; - * ATOMIC: access is atomic; - * ASSERT: access is not a regular access, but an assertion; - * SCOPED: access is a scoped access; - */ -#define KCSAN_ACCESS_WRITE 0x1 -#define KCSAN_ACCESS_ATOMIC 0x2 -#define KCSAN_ACCESS_ASSERT 0x4 -#define KCSAN_ACCESS_SCOPED 0x8 +/* Access types -- if KCSAN_ACCESS_WRITE is not set, the access is a read. */ +#define KCSAN_ACCESS_WRITE (1 << 0) /* Access is a write. */ +#define KCSAN_ACCESS_COMPOUND (1 << 1) /* Compounded read-write instrumentation. */ +#define KCSAN_ACCESS_ATOMIC (1 << 2) /* Access is atomic. */ +/* The following are special, and never due to compiler instrumentation. */ +#define KCSAN_ACCESS_ASSERT (1 << 3) /* Access is an assertion. */ +#define KCSAN_ACCESS_SCOPED (1 << 4) /* Access is a scoped access. */ /* * __kcsan_*: Always calls into the runtime when KCSAN is enabled. This may be used @@ -204,6 +198,15 @@ static inline void __kcsan_disable_current(void) { } #define __kcsan_check_write(ptr, size) \ __kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE) +/** + * __kcsan_check_read_write - check regular read-write access for races + * + * @ptr: address of access + * @size: size of access + */ +#define __kcsan_check_read_write(ptr, size) \ + __kcsan_check_access(ptr, size, KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE) + /** * kcsan_check_read - check regular read access for races * @@ -221,18 +224,30 @@ static inline void __kcsan_disable_current(void) { } #define kcsan_check_write(ptr, size) \ kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE) +/** + * kcsan_check_read_write - check regular read-write access for races + * + * @ptr: address of access + * @size: size of access + */ +#define kcsan_check_read_write(ptr, size) \ + kcsan_check_access(ptr, size, KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE) + /* * Check for atomic accesses: if atomic accesses are not ignored, this simply * aliases to kcsan_check_access(), otherwise becomes a no-op. */ #ifdef CONFIG_KCSAN_IGNORE_ATOMICS -#define kcsan_check_atomic_read(...) do { } while (0) -#define kcsan_check_atomic_write(...) do { } while (0) +#define kcsan_check_atomic_read(...) do { } while (0) +#define kcsan_check_atomic_write(...) do { } while (0) +#define kcsan_check_atomic_read_write(...) do { } while (0) #else #define kcsan_check_atomic_read(ptr, size) \ kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC) #define kcsan_check_atomic_write(ptr, size) \ kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC | KCSAN_ACCESS_WRITE) +#define kcsan_check_atomic_read_write(ptr, size) \ + kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_COMPOUND) #endif /** -- cgit v1.2.3 From 00047c2e6d7c576c1a847f7db07ef0fc58085f22 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 24 Jul 2020 09:00:06 +0200 Subject: instrumented.h: Introduce read-write instrumentation hooks Introduce read-write instrumentation hooks, to more precisely denote an operation's behaviour. KCSAN is able to distinguish compound instrumentation, and with the new instrumentation we then benefit from improved reporting. More importantly, read-write compound operations should not implicitly be treated as atomic, if they aren't actually atomic. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/instrumented.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h index 43e6ea591975..42faebbaa202 100644 --- a/include/linux/instrumented.h +++ b/include/linux/instrumented.h @@ -42,6 +42,21 @@ static __always_inline void instrument_write(const volatile void *v, size_t size kcsan_check_write(v, size); } +/** + * instrument_read_write - instrument regular read-write access + * + * Instrument a regular write access. The instrumentation should be inserted + * before the actual write happens. + * + * @ptr address of access + * @size size of access + */ +static __always_inline void instrument_read_write(const volatile void *v, size_t size) +{ + kasan_check_write(v, size); + kcsan_check_read_write(v, size); +} + /** * instrument_atomic_read - instrument atomic read access * @@ -72,6 +87,21 @@ static __always_inline void instrument_atomic_write(const volatile void *v, size kcsan_check_atomic_write(v, size); } +/** + * instrument_atomic_read_write - instrument atomic read-write access + * + * Instrument an atomic read-write access. The instrumentation should be + * inserted before the actual write happens. + * + * @ptr address of access + * @size size of access + */ +static __always_inline void instrument_atomic_read_write(const volatile void *v, size_t size) +{ + kasan_check_write(v, size); + kcsan_check_atomic_read_write(v, size); +} + /** * instrument_copy_to_user - instrument reads of copy_to_user * -- cgit v1.2.3 From 583bbf0624dfd8fc45f1049be1d4980be59451ff Mon Sep 17 00:00:00 2001 From: Luke Hsiao Date: Fri, 21 Aug 2020 21:41:04 -0700 Subject: io_uring: allow tcp ancillary data for __sys_recvmsg_sock() For TCP tx zero-copy, the kernel notifies the process of completions by queuing completion notifications on the socket error queue. This patch allows reading these notifications via recvmsg to support TCP tx zero-copy. Ancillary data was originally disallowed due to privilege escalation via io_uring's offloading of sendmsg() onto a kernel thread with kernel credentials (https://crbug.com/project-zero/1975). So, we must ensure that the socket type is one where the ancillary data types that are delivered on recvmsg are plain data (no file descriptors or values that are translated based on the identity of the calling process). This was tested by using io_uring to call recvmsg on the MSG_ERRQUEUE with tx zero-copy enabled. Before this patch, we received -EINVALID from this specific code path. After this patch, we could read tcp tx zero-copy completion notifications from the MSG_ERRQUEUE. Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Arjun Roy Acked-by: Eric Dumazet Reviewed-by: Jann Horn Reviewed-by: Jens Axboe Signed-off-by: Luke Hsiao Signed-off-by: David S. Miller --- include/linux/net.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index d48ff1180879..7657c6432a69 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -41,6 +41,8 @@ struct net; #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 +#define PROTO_CMSG_DATA_ONLY 0x0001 + #ifndef ARCH_HAS_SOCKET_TYPES /** * enum sock_type - Socket types @@ -135,6 +137,7 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, struct proto_ops { int family; + unsigned int flags; struct module *owner; int (*release) (struct socket *sock); int (*bind) (struct socket *sock, -- cgit v1.2.3 From 755f982bb1ff469a181df3eaf8dd5d769267ab8e Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Sun, 23 Aug 2020 14:19:26 +0300 Subject: qed/qede: make devlink survive recovery Devlink instance lifecycle was linked to qed_dev object, that caused devlink to be recreated on each recovery. Changing it by making higher level driver (qede) responsible for its life. This way devlink now survives recoveries. qede now stores devlink structure pointer as a part of its device object, devlink private data contains a linkage structure, qed_devlink. Signed-off-by: Igor Russkikh Signed-off-by: Alexander Lobakin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index cd6a5c7e56eb..d8368e1770df 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -21,6 +21,7 @@ #include #include #include +#include enum dcbx_protocol_type { DCBX_PROTOCOL_ISCSI, @@ -779,6 +780,10 @@ enum qed_nvm_flash_cmd { QED_NVM_FLASH_CMD_NVM_MAX, }; +struct qed_devlink { + struct qed_dev *cdev; +}; + struct qed_common_cb_ops { void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc); void (*link_update)(void *dev, struct qed_link_output *link); @@ -1137,6 +1142,10 @@ struct qed_common_ops { * */ int (*set_grc_config)(struct qed_dev *cdev, u32 cfg_id, u32 val); + + struct devlink* (*devlink_register)(struct qed_dev *cdev); + + void (*devlink_unregister)(struct devlink *devlink); }; #define MASK_FIELD(_name, _value) \ -- cgit v1.2.3 From 9524067b9a91dce2a096a0de7727c217495e3d2e Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Sun, 23 Aug 2020 14:19:29 +0300 Subject: qed: health reporter init deinit seq Here we declare health reporter ops (empty for now) and register these in qed probe and remove callbacks. This way we get devlink attached to all kind of qed* PCI device entities: networking or storage offload entity. Signed-off-by: Igor Russkikh Signed-off-by: Alexander Lobakin Signed-off-by: Michal Kalderon Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index d8368e1770df..30fe06fe06a0 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -782,6 +782,7 @@ enum qed_nvm_flash_cmd { struct qed_devlink { struct qed_dev *cdev; + struct devlink_health_reporter *fw_reporter; }; struct qed_common_cb_ops { -- cgit v1.2.3 From 4f5a8db27eb926616500f827d9b81dc40595b0ef Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Sun, 23 Aug 2020 14:19:30 +0300 Subject: qed: use devlink logic to report errors Use devlink_health_report to push error indications. We implement this in qede via callback function to make it possible to reuse the same for other drivers sitting on top of qed in future. Signed-off-by: Igor Russkikh Signed-off-by: Alexander Lobakin Signed-off-by: Michal Kalderon Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 30fe06fe06a0..a75533de9186 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -906,6 +906,9 @@ struct qed_common_ops { int (*dbg_all_data_size) (struct qed_dev *cdev); + int (*report_fatal_error)(struct devlink *devlink, + enum qed_hw_err_type err_type); + /** * @brief can_link_change - can the instance change the link or not * -- cgit v1.2.3 From c5c642c55e2fd43fcf42262fd1e87271d413fb42 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Sun, 23 Aug 2020 14:19:33 +0300 Subject: qed: align adjacent indent Remove extra indent on some of adjacent declarations. Signed-off-by: Igor Russkikh Signed-off-by: Alexander Lobakin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 69 ++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index a75533de9186..56fa55841d39 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -850,10 +850,9 @@ struct qed_common_ops { struct qed_dev* (*probe)(struct pci_dev *dev, struct qed_probe_params *params); - void (*remove)(struct qed_dev *cdev); + void (*remove)(struct qed_dev *cdev); - int (*set_power_state)(struct qed_dev *cdev, - pci_power_t state); + int (*set_power_state)(struct qed_dev *cdev, pci_power_t state); void (*set_name) (struct qed_dev *cdev, char name[]); @@ -861,50 +860,48 @@ struct qed_common_ops { * PF params required for the call before slowpath_start is * documented within the qed_pf_params structure definition. */ - void (*update_pf_params)(struct qed_dev *cdev, - struct qed_pf_params *params); - int (*slowpath_start)(struct qed_dev *cdev, - struct qed_slowpath_params *params); + void (*update_pf_params)(struct qed_dev *cdev, + struct qed_pf_params *params); - int (*slowpath_stop)(struct qed_dev *cdev); + int (*slowpath_start)(struct qed_dev *cdev, + struct qed_slowpath_params *params); + + int (*slowpath_stop)(struct qed_dev *cdev); /* Requests to use `cnt' interrupts for fastpath. * upon success, returns number of interrupts allocated for fastpath. */ - int (*set_fp_int)(struct qed_dev *cdev, - u16 cnt); + int (*set_fp_int)(struct qed_dev *cdev, u16 cnt); /* Fills `info' with pointers required for utilizing interrupts */ - int (*get_fp_int)(struct qed_dev *cdev, - struct qed_int_info *info); - - u32 (*sb_init)(struct qed_dev *cdev, - struct qed_sb_info *sb_info, - void *sb_virt_addr, - dma_addr_t sb_phy_addr, - u16 sb_id, - enum qed_sb_type type); - - u32 (*sb_release)(struct qed_dev *cdev, - struct qed_sb_info *sb_info, - u16 sb_id, - enum qed_sb_type type); - - void (*simd_handler_config)(struct qed_dev *cdev, - void *token, - int index, - void (*handler)(void *)); - - void (*simd_handler_clean)(struct qed_dev *cdev, - int index); - int (*dbg_grc)(struct qed_dev *cdev, - void *buffer, u32 *num_dumped_bytes); + int (*get_fp_int)(struct qed_dev *cdev, struct qed_int_info *info); + + u32 (*sb_init)(struct qed_dev *cdev, + struct qed_sb_info *sb_info, + void *sb_virt_addr, + dma_addr_t sb_phy_addr, + u16 sb_id, + enum qed_sb_type type); + + u32 (*sb_release)(struct qed_dev *cdev, + struct qed_sb_info *sb_info, + u16 sb_id, + enum qed_sb_type type); + + void (*simd_handler_config)(struct qed_dev *cdev, + void *token, + int index, + void (*handler)(void *)); + + void (*simd_handler_clean)(struct qed_dev *cdev, int index); + + int (*dbg_grc)(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes); int (*dbg_grc_size)(struct qed_dev *cdev); - int (*dbg_all_data) (struct qed_dev *cdev, void *buffer); + int (*dbg_all_data)(struct qed_dev *cdev, void *buffer); - int (*dbg_all_data_size) (struct qed_dev *cdev); + int (*dbg_all_data_size)(struct qed_dev *cdev); int (*report_fatal_error)(struct devlink *devlink, enum qed_hw_err_type err_type); -- cgit v1.2.3 From 000601bb62330f18dc8f5d2d0b82e9aec3e207c4 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 9 Jul 2020 15:05:59 +0200 Subject: rcu: Fix kerneldoc comments in rcupdate.h This commit fixes the kerneldoc comments for rcu_read_unlock_bh(), rcu_read_unlock_sched() and rcu_head_after_call_rcu() so they e.g. get properly linked in the API documentation. Also add parenthesis after function names to match the notation used in other kerneldoc comments in the same file. Signed-off-by: Tobias Klauser Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d15d46db61f7..b47d6b66665e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -709,8 +709,8 @@ static inline void rcu_read_lock_bh(void) "rcu_read_lock_bh() used illegally while idle"); } -/* - * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section +/** + * rcu_read_unlock_bh() - marks the end of a softirq-only RCU critical section * * See rcu_read_lock_bh() for more information. */ @@ -751,10 +751,10 @@ static inline notrace void rcu_read_lock_sched_notrace(void) __acquire(RCU_SCHED); } -/* - * rcu_read_unlock_sched - marks the end of a RCU-classic critical section +/** + * rcu_read_unlock_sched() - marks the end of a RCU-classic critical section * - * See rcu_read_lock_sched for more information. + * See rcu_read_lock_sched() for more information. */ static inline void rcu_read_unlock_sched(void) { @@ -945,7 +945,7 @@ static inline void rcu_head_init(struct rcu_head *rhp) } /** - * rcu_head_after_call_rcu - Has this rcu_head been passed to call_rcu()? + * rcu_head_after_call_rcu() - Has this rcu_head been passed to call_rcu()? * @rhp: The rcu_head structure to test. * @f: The function passed to call_rcu() along with @rhp. * -- cgit v1.2.3 From ae2212a7216b674633bdc3bd2e24947a0665efb8 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Sun, 12 Jul 2020 18:40:02 +0530 Subject: rculist: Introduce list/hlist_for_each_entry_srcu() macros list/hlist_for_each_entry_rcu() provides an optional cond argument to specify the lock held in the updater side. However for SRCU read side, not providing the cond argument results into false positive as whether srcu_read_lock is held or not is not checked implicitly. Therefore, on read side the lockdep expression srcu_read_lock_held(srcu struct) can solve this issue. However, the function still fails to check the cases where srcu protected list is traversed with rcu_read_lock() instead of srcu_read_lock(). Therefore, to remove the false negative, this patch introduces two new list traversal primitives : list_for_each_entry_srcu() and hlist_for_each_entry_srcu(). Both of the functions have non-optional cond argument as it is required for both read and update side, and simply checks if the cond is true. For regular read side the lockdep expression srcu_read_lock_head() can be passed as the cond argument to list/hlist_for_each_entry_srcu(). Suggested-by: Paolo Bonzini Tested-by: Suraj Upadhyay Tested-by: Naresh Kamboju [ paulmck: Add "true" per kbuild test robot feedback. ] Signed-off-by: Madhuparna Bhowmik Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 7a6fc9956510..f8633d37e358 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -63,9 +63,17 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) RCU_LOCKDEP_WARN(!(cond) && !rcu_read_lock_any_held(), \ "RCU-list traversed in non-reader section!"); \ }) + +#define __list_check_srcu(cond) \ + ({ \ + RCU_LOCKDEP_WARN(!(cond), \ + "RCU-list traversed without holding the required lock!");\ + }) #else #define __list_check_rcu(dummy, cond, extra...) \ ({ check_arg_count_one(extra); }) + +#define __list_check_srcu(cond) ({ }) #endif /* @@ -385,6 +393,25 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, &pos->member != (head); \ pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) +/** + * list_for_each_entry_srcu - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * @cond: lockdep expression for the lock required to traverse the list. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by srcu_read_lock(). + * The lockdep expression srcu_read_lock_held() can be passed as the + * cond argument from read side. + */ +#define list_for_each_entry_srcu(pos, head, member, cond) \ + for (__list_check_srcu(cond), \ + pos = list_entry_rcu((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) + /** * list_entry_lockless - get the struct for this entry * @ptr: the &struct list_head pointer. @@ -683,6 +710,27 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\ &(pos)->member)), typeof(*(pos)), member)) +/** + * hlist_for_each_entry_srcu - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * @cond: lockdep expression for the lock required to traverse the list. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as hlist_add_head_rcu() + * as long as the traversal is guarded by srcu_read_lock(). + * The lockdep expression srcu_read_lock_held() can be passed as the + * cond argument from read side. + */ +#define hlist_for_each_entry_srcu(pos, head, member, cond) \ + for (__list_check_srcu(cond), \ + pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\ + &(pos)->member)), typeof(*(pos)), member)) + /** * hlist_for_each_entry_rcu_notrace - iterate over rcu list of given type (for tracing) * @pos: the type * to use as a loop cursor. -- cgit v1.2.3 From 7f2a53c231fe5d9522c3b695ab454203904031ac Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 17 Aug 2020 10:37:22 -0700 Subject: rcu: Remove unused __rcu_is_watching() function The x86/entry work removed all uses of __rcu_is_watching(), therefore this commit removes it entirely. Cc: Andy Lutomirski Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 1 - include/linux/rcutree.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 5cc9637cac16..7c1ecdb356d8 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -103,7 +103,6 @@ static inline void rcu_scheduler_starting(void) { } static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_inkernel_boot_has_ended(void) { return true; } static inline bool rcu_is_watching(void) { return true; } -static inline bool __rcu_is_watching(void) { return true; } static inline void rcu_momentary_dyntick_idle(void) { } static inline void kfree_rcu_scheduler_running(void) { } static inline bool rcu_gp_might_be_stalled(void) { return false; } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d2f4064ebd1d..59eb5cd567d7 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -64,7 +64,6 @@ extern int rcu_scheduler_active __read_mostly; void rcu_end_inkernel_boot(void); bool rcu_inkernel_boot_has_ended(void); bool rcu_is_watching(void); -bool __rcu_is_watching(void); #ifndef CONFIG_PREEMPTION void rcu_all_qs(void); #endif -- cgit v1.2.3 From aa40c138cc8f36e2f5c721fd1bdb823a1ef1a237 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 10 Aug 2020 09:58:03 -0700 Subject: rcu: Report QS for outermost PREEMPT=n rcu_read_unlock() for strict GPs The CONFIG_PREEMPT=n instance of rcu_read_unlock is even more aggressively than that of CONFIG_PREEMPT=y in deferring reporting quiescent states to the RCU core. This is just what is wanted in normal use because it reduces overhead, but the resulting delay is not what is wanted for kernels built with CONFIG_RCU_STRICT_GRACE_PERIOD=y. This commit therefore adds an rcu_read_unlock_strict() function that checks for exceptional conditions, and reports the newly started quiescent state if it is safe to do so, also doing a spin-delay if requested via rcutree.rcu_unlock_delay. This commit also adds a call to rcu_read_unlock_strict() from the CONFIG_PREEMPT=n instance of __rcu_read_unlock(). [ paulmck: Fixed bug located by kernel test robot ] Reported-by Jann Horn Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d15d46db61f7..522529a13786 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -55,6 +55,12 @@ void __rcu_read_unlock(void); #else /* #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_TINY_RCU +#define rcu_read_unlock_strict() do { } while (0) +#else +void rcu_read_unlock_strict(void); +#endif + static inline void __rcu_read_lock(void) { preempt_disable(); @@ -63,6 +69,7 @@ static inline void __rcu_read_lock(void) static inline void __rcu_read_unlock(void) { preempt_enable(); + rcu_read_unlock_strict(); } static inline int rcu_preempt_depth(void) -- cgit v1.2.3 From 00cda13e339c9f4956a6f036675df2ca5b5a552e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 14 Aug 2020 00:34:02 +0300 Subject: power: supply: Support battery temperature device-tree properties The generic battery temperature properties are already supported by the power-supply core. Let's support parsing of the common battery temperature properties from a device-tree. Signed-off-by: Dmitry Osipenko Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 97cc4b85bf61..d0684362a392 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -365,6 +365,12 @@ struct power_supply_battery_info { int constant_charge_voltage_max_uv; /* microVolts */ int factory_internal_resistance_uohm; /* microOhms */ int ocv_temp[POWER_SUPPLY_OCV_TEMP_MAX];/* celsius */ + int temp_ambient_alert_min; /* celsius */ + int temp_ambient_alert_max; /* celsius */ + int temp_alert_min; /* celsius */ + int temp_alert_max; /* celsius */ + int temp_min; /* celsius */ + int temp_max; /* celsius */ struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX]; int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX]; struct power_supply_resistance_temp_table *resist_table; -- cgit v1.2.3 From f836a56e84ffc9f1a1cd73f77e10404ca46a4616 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 25 Aug 2020 20:29:15 +0200 Subject: bpf: Generalize bpf_sk_storage Refactor the functionality in bpf_sk_storage.c so that concept of storage linked to kernel objects can be extended to other objects like inode, task_struct etc. Each new local storage will still be a separate map and provide its own set of helpers. This allows for future object specific extensions and still share a lot of the underlying implementation. This includes the changes suggested by Martin in: https://lore.kernel.org/bpf/20200725013047.4006241-1-kafai@fb.com/ adding new map operations to support bpf_local_storage maps: * storages for different kernel objects to optionally have different memory charging strategy (map_local_storage_charge, map_local_storage_uncharge) * Functionality to extract the storage pointer from a pointer to the owning object (map_owner_storage_ptr) Co-developed-by: Martin KaFai Lau Signed-off-by: Martin KaFai Lau Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200825182919.1118197-4-kpsingh@chromium.org --- include/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 81f38e2fda78..8c443b93ac11 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -34,6 +34,8 @@ struct btf_type; struct exception_table_entry; struct seq_operations; struct bpf_iter_aux_info; +struct bpf_local_storage; +struct bpf_local_storage_map; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -104,6 +106,12 @@ struct bpf_map_ops { __poll_t (*map_poll)(struct bpf_map *map, struct file *filp, struct poll_table_struct *pts); + /* Functions called by bpf_local_storage maps */ + int (*map_local_storage_charge)(struct bpf_local_storage_map *smap, + void *owner, u32 size); + void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap, + void *owner, u32 size); + struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner); /* BTF name and id of struct allocated by map_alloc */ const char * const map_btf_name; int *map_btf_id; -- cgit v1.2.3 From 450af8d0f6be2e7dd2a528a3fb054bb726bf1747 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 25 Aug 2020 20:29:16 +0200 Subject: bpf: Split bpf_local_storage to bpf_sk_storage A purely mechanical change: bpf_sk_storage.c = bpf_sk_storage.c + bpf_local_storage.c bpf_sk_storage.h = bpf_sk_storage.h + bpf_local_storage.h Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200825182919.1118197-5-kpsingh@chromium.org --- include/linux/bpf_local_storage.h | 163 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 include/linux/bpf_local_storage.h (limited to 'include/linux') diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h new file mode 100644 index 000000000000..b2c9463f36a1 --- /dev/null +++ b/include/linux/bpf_local_storage.h @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2019 Facebook + * Copyright 2020 Google LLC. + */ + +#ifndef _BPF_LOCAL_STORAGE_H +#define _BPF_LOCAL_STORAGE_H + +#include +#include +#include +#include +#include +#include + +#define BPF_LOCAL_STORAGE_CACHE_SIZE 16 + +struct bpf_local_storage_map_bucket { + struct hlist_head list; + raw_spinlock_t lock; +}; + +/* Thp map is not the primary owner of a bpf_local_storage_elem. + * Instead, the container object (eg. sk->sk_bpf_storage) is. + * + * The map (bpf_local_storage_map) is for two purposes + * 1. Define the size of the "local storage". It is + * the map's value_size. + * + * 2. Maintain a list to keep track of all elems such + * that they can be cleaned up during the map destruction. + * + * When a bpf local storage is being looked up for a + * particular object, the "bpf_map" pointer is actually used + * as the "key" to search in the list of elem in + * the respective bpf_local_storage owned by the object. + * + * e.g. sk->sk_bpf_storage is the mini-map with the "bpf_map" pointer + * as the searching key. + */ +struct bpf_local_storage_map { + struct bpf_map map; + /* Lookup elem does not require accessing the map. + * + * Updating/Deleting requires a bucket lock to + * link/unlink the elem from the map. Having + * multiple buckets to improve contention. + */ + struct bpf_local_storage_map_bucket *buckets; + u32 bucket_log; + u16 elem_size; + u16 cache_idx; +}; + +struct bpf_local_storage_data { + /* smap is used as the searching key when looking up + * from the object's bpf_local_storage. + * + * Put it in the same cacheline as the data to minimize + * the number of cachelines access during the cache hit case. + */ + struct bpf_local_storage_map __rcu *smap; + u8 data[] __aligned(8); +}; + +/* Linked to bpf_local_storage and bpf_local_storage_map */ +struct bpf_local_storage_elem { + struct hlist_node map_node; /* Linked to bpf_local_storage_map */ + struct hlist_node snode; /* Linked to bpf_local_storage */ + struct bpf_local_storage __rcu *local_storage; + struct rcu_head rcu; + /* 8 bytes hole */ + /* The data is stored in aother cacheline to minimize + * the number of cachelines access during a cache hit. + */ + struct bpf_local_storage_data sdata ____cacheline_aligned; +}; + +struct bpf_local_storage { + struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE]; + struct hlist_head list; /* List of bpf_local_storage_elem */ + void *owner; /* The object that owns the above "list" of + * bpf_local_storage_elem. + */ + struct rcu_head rcu; + raw_spinlock_t lock; /* Protect adding/removing from the "list" */ +}; + +/* U16_MAX is much more than enough for sk local storage + * considering a tcp_sock is ~2k. + */ +#define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE \ + min_t(u32, \ + (KMALLOC_MAX_SIZE - MAX_BPF_STACK - \ + sizeof(struct bpf_local_storage_elem)), \ + (U16_MAX - sizeof(struct bpf_local_storage_elem))) + +#define SELEM(_SDATA) \ + container_of((_SDATA), struct bpf_local_storage_elem, sdata) +#define SDATA(_SELEM) (&(_SELEM)->sdata) + +#define BPF_LOCAL_STORAGE_CACHE_SIZE 16 + +struct bpf_local_storage_cache { + spinlock_t idx_lock; + u64 idx_usage_counts[BPF_LOCAL_STORAGE_CACHE_SIZE]; +}; + +#define DEFINE_BPF_STORAGE_CACHE(name) \ +static struct bpf_local_storage_cache name = { \ + .idx_lock = __SPIN_LOCK_UNLOCKED(name.idx_lock), \ +} + +u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache); +void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache, + u16 idx); + +/* Helper functions for bpf_local_storage */ +int bpf_local_storage_map_alloc_check(union bpf_attr *attr); + +struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr); + +struct bpf_local_storage_data * +bpf_local_storage_lookup(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *smap, + bool cacheit_lockit); + +void bpf_local_storage_map_free(struct bpf_local_storage_map *smap); + +int bpf_local_storage_map_check_btf(const struct bpf_map *map, + const struct btf *btf, + const struct btf_type *key_type, + const struct btf_type *value_type); + +void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, + struct bpf_local_storage_elem *selem); + +bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, + struct bpf_local_storage_elem *selem, + bool uncharge_omem); + +void bpf_selem_unlink(struct bpf_local_storage_elem *selem); + +void bpf_selem_link_map(struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem); + +void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem); + +struct bpf_local_storage_elem * +bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, + bool charge_mem); + +int +bpf_local_storage_alloc(void *owner, + struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *first_selem); + +struct bpf_local_storage_data * +bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, + void *value, u64 map_flags); + +#endif /* _BPF_LOCAL_STORAGE_H */ -- cgit v1.2.3 From 8ea636848aca35b9f97c5b5dee30225cf2dd0fe6 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 25 Aug 2020 20:29:17 +0200 Subject: bpf: Implement bpf_local_storage for inodes Similar to bpf_local_storage for sockets, add local storage for inodes. The life-cycle of storage is managed with the life-cycle of the inode. i.e. the storage is destroyed along with the owning inode. The BPF LSM allocates an __rcu pointer to the bpf_local_storage in the security blob which are now stackable and can co-exist with other LSMs. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200825182919.1118197-6-kpsingh@chromium.org --- include/linux/bpf_lsm.h | 29 +++++++++++++++++++++++++++++ include/linux/bpf_types.h | 3 +++ 2 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index af74712af585..aaacb6aafc87 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -17,9 +17,28 @@ #include #undef LSM_HOOK +struct bpf_storage_blob { + struct bpf_local_storage __rcu *storage; +}; + +extern struct lsm_blob_sizes bpf_lsm_blob_sizes; + int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog); +static inline struct bpf_storage_blob *bpf_inode( + const struct inode *inode) +{ + if (unlikely(!inode->i_security)) + return NULL; + + return inode->i_security + bpf_lsm_blob_sizes.lbs_inode; +} + +extern const struct bpf_func_proto bpf_inode_storage_get_proto; +extern const struct bpf_func_proto bpf_inode_storage_delete_proto; +void bpf_inode_storage_free(struct inode *inode); + #else /* !CONFIG_BPF_LSM */ static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, @@ -28,6 +47,16 @@ static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, return -EOPNOTSUPP; } +static inline struct bpf_storage_blob *bpf_inode( + const struct inode *inode) +{ + return NULL; +} + +static inline void bpf_inode_storage_free(struct inode *inode) +{ +} + #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index a52a5688418e..2e6f568377f1 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -107,6 +107,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) #endif +#ifdef CONFIG_BPF_LSM +BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) -- cgit v1.2.3 From 6298399bfc101f8e8cf35a916f26aa32bdf04278 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 25 Aug 2020 21:21:13 +0200 Subject: bpf: Move btf_resolve_size into __btf_resolve_size Moving btf_resolve_size into __btf_resolve_size and keeping btf_resolve_size public with just first 3 arguments, because the rest of the arguments are not used by outside callers. Following changes are adding more arguments, which are not useful to outside callers. They will be added to the __btf_resolve_size function. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200825192124.710397-4-jolsa@kernel.org --- include/linux/btf.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index 8b81fbb4497c..a9af5e7a7ece 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -64,8 +64,7 @@ const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf, u32 id, u32 *res_id); const struct btf_type * btf_resolve_size(const struct btf *btf, const struct btf_type *type, - u32 *type_size, const struct btf_type **elem_type, - u32 *total_nelems); + u32 *type_size); #define for_each_member(i, struct_type, member) \ for (i = 0, member = btf_type_member(struct_type); \ -- cgit v1.2.3 From faaf4a790d93794b46d67e2fd69b8e5c8cae2d41 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 25 Aug 2020 21:21:18 +0200 Subject: bpf: Add btf_struct_ids_match function Adding btf_struct_ids_match function to check if given address provided by BTF object + offset is also address of another nested BTF object. This allows to pass an argument to helper, which is defined via parent BTF object + offset, like for bpf_d_path (added in following changes): SEC("fentry/filp_close") int BPF_PROG(prog_close, struct file *file, void *id) { ... ret = bpf_d_path(&file->f_path, ... The first bpf_d_path argument is hold by verifier as BTF file object plus offset of f_path member. The btf_struct_ids_match function will walk the struct file object and check if there's nested struct path object on the given offset. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200825192124.710397-9-jolsa@kernel.org --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8c443b93ac11..540f5e6c3788 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1358,6 +1358,8 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); +bool btf_struct_ids_match(struct bpf_verifier_log *log, + int off, u32 id, u32 need_type_id); int btf_resolve_helper_id(struct bpf_verifier_log *log, const struct bpf_func_proto *fn, int); -- cgit v1.2.3 From eae2e83e62633a2659e3bc690facba1c2fc9c45b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 25 Aug 2020 21:21:19 +0200 Subject: bpf: Add BTF_SET_START/END macros Adding support to define sorted set of BTF ID values. Following defines sorted set of BTF ID values: BTF_SET_START(btf_allowlist_d_path) BTF_ID(func, vfs_truncate) BTF_ID(func, vfs_fallocate) BTF_ID(func, dentry_open) BTF_ID(func, vfs_getattr) BTF_ID(func, filp_close) BTF_SET_END(btf_allowlist_d_path) It defines following 'struct btf_id_set' variable to access values and count: struct btf_id_set btf_allowlist_d_path; Adding 'allowed' callback to struct bpf_func_proto, to allow verifier the check on allowed callers. Adding btf_id_set_contains function, which will be used by allowed callbacks to verify the caller's BTF ID value is within allowed set. Also removing extra '\' in __BTF_ID_LIST macro. Added BTF_SET_START_GLOBAL macro for global sets. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200825192124.710397-10-jolsa@kernel.org --- include/linux/bpf.h | 4 ++++ include/linux/btf_ids.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 540f5e6c3788..a6131d95e31e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -317,6 +317,7 @@ struct bpf_func_proto { * for this argument. */ int *ret_btf_id; /* return value btf_id */ + bool (*allowed)(const struct bpf_prog *prog); }; /* bpf_context is intentionally undefined structure. Pointer to bpf_context is @@ -1878,4 +1879,7 @@ enum bpf_text_poke_type { int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); +struct btf_id_set; +bool btf_id_set_contains(struct btf_id_set *set, u32 id); + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 4867d549e3c1..210b086188a3 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -3,6 +3,11 @@ #ifndef _LINUX_BTF_IDS_H #define _LINUX_BTF_IDS_H +struct btf_id_set { + u32 cnt; + u32 ids[]; +}; + #ifdef CONFIG_DEBUG_INFO_BTF #include /* for __PASTE */ @@ -62,7 +67,7 @@ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ "." #scope " " #name "; \n" \ #name ":; \n" \ -".popsection; \n"); \ +".popsection; \n"); #define BTF_ID_LIST(name) \ __BTF_ID_LIST(name, local) \ @@ -88,12 +93,56 @@ asm( \ ".zero 4 \n" \ ".popsection; \n"); +/* + * The BTF_SET_START/END macros pair defines sorted list of + * BTF IDs plus its members count, with following layout: + * + * BTF_SET_START(list) + * BTF_ID(type1, name1) + * BTF_ID(type2, name2) + * BTF_SET_END(list) + * + * __BTF_ID__set__list: + * .zero 4 + * list: + * __BTF_ID__type1__name1__3: + * .zero 4 + * __BTF_ID__type2__name2__4: + * .zero 4 + * + */ +#define __BTF_SET_START(name, scope) \ +asm( \ +".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ +"." #scope " __BTF_ID__set__" #name "; \n" \ +"__BTF_ID__set__" #name ":; \n" \ +".zero 4 \n" \ +".popsection; \n"); + +#define BTF_SET_START(name) \ +__BTF_ID_LIST(name, local) \ +__BTF_SET_START(name, local) + +#define BTF_SET_START_GLOBAL(name) \ +__BTF_ID_LIST(name, globl) \ +__BTF_SET_START(name, globl) + +#define BTF_SET_END(name) \ +asm( \ +".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ +".size __BTF_ID__set__" #name ", .-" #name " \n" \ +".popsection; \n"); \ +extern struct btf_id_set name; + #else #define BTF_ID_LIST(name) static u32 name[5]; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED #define BTF_ID_LIST_GLOBAL(name) u32 name[1]; +#define BTF_SET_START(name) static struct btf_id_set name = { 0 }; +#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 }; +#define BTF_SET_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ -- cgit v1.2.3 From 01ccf592362a984534371b3596d4c953da6a7bb2 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 19 Aug 2020 21:55:05 +0200 Subject: sched: Bring the PF_IO_WORKER and PF_WQ_WORKER bits closer together The bits PF_IO_WORKER and PF_WQ_WORKER are tested together in sched_submit_work() which is considered to be a hot path. If the two bits cross the 8 or 16 bit boundary then most architecture require multiple load instructions in order to create the constant value. Also, such a value can not be encoded within the compare opcode. By moving the bit definition within the same block, the compiler can create/use one immediate value. For some reason gcc-10 on ARM64 requires both bits to be next to each other in order to issue "tst reg, val; bne label". Otherwise the result is "mov reg1, val; tst reg, reg1; bne label". Move PF_VCPU out of the way so that PF_IO_WORKER can be next to PF_WQ_WORKER. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200819195505.y3fxk72sotnrkczi@linutronix.de --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 93ecd930efd3..2bf0af19a62a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1489,9 +1489,10 @@ extern struct pid *cad_pid; /* * Per process flags */ +#define PF_VCPU 0x00000001 /* I'm a virtual CPU */ #define PF_IDLE 0x00000002 /* I am an IDLE thread */ #define PF_EXITING 0x00000004 /* Getting shut down */ -#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ +#define PF_IO_WORKER 0x00000010 /* Task is an IO worker */ #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ #define PF_MCE_PROCESS 0x00000080 /* Process policy on mce errors */ @@ -1515,7 +1516,6 @@ extern struct pid *cad_pid; #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */ -#define PF_IO_WORKER 0x20000000 /* Task is an IO worker */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ -- cgit v1.2.3 From 8fca9494d4b4d6b57b1398cd473feb308df656db Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 25 Aug 2020 14:32:15 +0100 Subject: sched/topology: Move sd_flag_debug out of linux/sched/topology.h Defining an array in a header imported all over the place clearly is a daft idea, that still didn't stop me from doing it. Leave a declaration of sd_flag_debug in topology.h and move its definition to sched/debug.c. Fixes: b6e862f38672 ("sched/topology: Define and assign sched_domain flag metadata") Reported-by: Andy Shevchenko Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200825133216.9163-1-valentin.schneider@arm.com --- include/linux/sched/topology.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 2d59ca77103e..b9b0dab4d067 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -33,14 +33,13 @@ static const unsigned int SD_DEGENERATE_GROUPS_MASK = #undef SD_FLAG #ifdef CONFIG_SCHED_DEBUG -#define SD_FLAG(_name, mflags) [__##_name] = { .meta_flags = mflags, .name = #_name }, -static const struct { + +struct sd_flag_debug { unsigned int meta_flags; char *name; -} sd_flag_debug[] = { -#include }; -#undef SD_FLAG +extern const struct sd_flag_debug sd_flag_debug[]; + #endif #ifdef CONFIG_SCHED_SMT -- cgit v1.2.3 From 4fc472f1214ef75e5450f207e23ff13af6eecad4 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 25 Aug 2020 14:32:16 +0100 Subject: sched/topology: Move SD_DEGENERATE_GROUPS_MASK out of linux/sched/topology.h SD_DEGENERATE_GROUPS_MASK is only useful for sched/topology.c, but still gets defined for anyone who imports topology.h, leading to a flurry of unused variable warnings. Move it out of the header and place it next to the SD degeneration functions in sched/topology.c. Fixes: 4ee4ea443a5d ("sched/topology: Introduce SD metaflag for flags needing > 1 groups") Reported-by: Andy Shevchenko Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200825133216.9163-2-valentin.schneider@arm.com --- include/linux/sched/topology.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index b9b0dab4d067..9ef7bf686a9f 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -25,13 +25,6 @@ enum { }; #undef SD_FLAG -/* Generate a mask of SD flags with the SDF_NEEDS_GROUPS metaflag */ -#define SD_FLAG(name, mflags) (name * !!((mflags) & SDF_NEEDS_GROUPS)) | -static const unsigned int SD_DEGENERATE_GROUPS_MASK = -#include -0; -#undef SD_FLAG - #ifdef CONFIG_SCHED_DEBUG struct sd_flag_debug { -- cgit v1.2.3 From a435b9a14356587cf512ea6473368a579373c74c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 29 Jul 2020 13:00:57 +0200 Subject: locking/refcount: Provide __refcount API to obtain the old value David requested means to obtain the old/previous value from the refcount API for tracing purposes. Duplicate (most of) the API as __refcount*() with an additional 'int *' argument into which, if !NULL, the old value will be stored. Requested-by: David Howells Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20200729111120.GA2638@hirez.programming.kicks-ass.net --- include/linux/refcount.h | 65 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 0e3ee25eb156..7fabb1af18e0 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -165,7 +165,7 @@ static inline unsigned int refcount_read(const refcount_t *r) * * Return: false if the passed refcount is 0, true otherwise */ -static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) +static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) { int old = refcount_read(r); @@ -174,12 +174,20 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) break; } while (!atomic_try_cmpxchg_relaxed(&r->refs, &old, old + i)); + if (oldp) + *oldp = old; + if (unlikely(old < 0 || old + i < 0)) refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF); return old; } +static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) +{ + return __refcount_add_not_zero(i, r, NULL); +} + /** * refcount_add - add a value to a refcount * @i: the value to add to the refcount @@ -196,16 +204,24 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) * cases, refcount_inc(), or one of its variants, should instead be used to * increment a reference count. */ -static inline void refcount_add(int i, refcount_t *r) +static inline void __refcount_add(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_add_relaxed(i, &r->refs); + if (oldp) + *oldp = old; + if (unlikely(!old)) refcount_warn_saturate(r, REFCOUNT_ADD_UAF); else if (unlikely(old < 0 || old + i < 0)) refcount_warn_saturate(r, REFCOUNT_ADD_OVF); } +static inline void refcount_add(int i, refcount_t *r) +{ + __refcount_add(i, r, NULL); +} + /** * refcount_inc_not_zero - increment a refcount unless it is 0 * @r: the refcount to increment @@ -219,9 +235,14 @@ static inline void refcount_add(int i, refcount_t *r) * * Return: true if the increment was successful, false otherwise */ +static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp) +{ + return __refcount_add_not_zero(1, r, oldp); +} + static inline __must_check bool refcount_inc_not_zero(refcount_t *r) { - return refcount_add_not_zero(1, r); + return __refcount_inc_not_zero(r, NULL); } /** @@ -236,9 +257,14 @@ static inline __must_check bool refcount_inc_not_zero(refcount_t *r) * Will WARN if the refcount is 0, as this represents a possible use-after-free * condition. */ +static inline void __refcount_inc(refcount_t *r, int *oldp) +{ + __refcount_add(1, r, oldp); +} + static inline void refcount_inc(refcount_t *r) { - refcount_add(1, r); + __refcount_inc(r, NULL); } /** @@ -261,10 +287,13 @@ static inline void refcount_inc(refcount_t *r) * * Return: true if the resulting refcount is 0, false otherwise */ -static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) +static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_sub_release(i, &r->refs); + if (oldp) + *oldp = old; + if (old == i) { smp_acquire__after_ctrl_dep(); return true; @@ -276,6 +305,11 @@ static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) return false; } +static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) +{ + return __refcount_sub_and_test(i, r, NULL); +} + /** * refcount_dec_and_test - decrement a refcount and test if it is 0 * @r: the refcount @@ -289,9 +323,14 @@ static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) * * Return: true if the resulting refcount is 0, false otherwise */ +static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp) +{ + return __refcount_sub_and_test(1, r, oldp); +} + static inline __must_check bool refcount_dec_and_test(refcount_t *r) { - return refcount_sub_and_test(1, r); + return __refcount_dec_and_test(r, NULL); } /** @@ -304,12 +343,22 @@ static inline __must_check bool refcount_dec_and_test(refcount_t *r) * Provides release memory ordering, such that prior loads and stores are done * before. */ -static inline void refcount_dec(refcount_t *r) +static inline void __refcount_dec(refcount_t *r, int *oldp) { - if (unlikely(atomic_fetch_sub_release(1, &r->refs) <= 1)) + int old = atomic_fetch_sub_release(1, &r->refs); + + if (oldp) + *oldp = old; + + if (unlikely(old <= 1)) refcount_warn_saturate(r, REFCOUNT_DEC_LEAK); } +static inline void refcount_dec(refcount_t *r) +{ + __refcount_dec(r, NULL); +} + extern __must_check bool refcount_dec_if_one(refcount_t *r); extern __must_check bool refcount_dec_not_one(refcount_t *r); extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock); -- cgit v1.2.3 From a28e884b966e713da29caefbb347efea77367d22 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 16 Aug 2020 17:02:00 -0700 Subject: seqlock: Fix multiple kernel-doc warnings Fix kernel-doc warnings in . ../include/linux/seqlock.h:152: warning: Incorrect use of kernel-doc format: * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t ../include/linux/seqlock.h:164: warning: Incorrect use of kernel-doc format: * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKNAME_t and helpers ../include/linux/seqlock.h:229: warning: Function parameter or member 'seq_name' not described in 'SEQCOUNT_LOCKTYPE_ZERO' ../include/linux/seqlock.h:229: warning: Function parameter or member 'assoc_lock' not described in 'SEQCOUNT_LOCKTYPE_ZERO' ../include/linux/seqlock.h:229: warning: Excess function parameter 'name' description in 'SEQCOUNT_LOCKTYPE_ZERO' ../include/linux/seqlock.h:229: warning: Excess function parameter 'lock' description in 'SEQCOUNT_LOCKTYPE_ZERO' ../include/linux/seqlock.h:695: warning: duplicate section name 'NOTE' Demote kernel-doc notation for the macros "seqcount_LOCKNAME_init()" and "SEQCOUNT_LOCKTYPE()"; scripts/kernel-doc does not handle them correctly. Rename function parameters in SEQCNT_LOCKNAME_ZERO() documentation to match the macro's argument names. Change the macro name in the documentation to SEQCOUNT_LOCKTYPE_ZERO() to match the macro's name. For raw_write_seqcount_latch(), rename the second NOTE: to NOTE2: to prevent a kernel-doc warning. However, the generated output is not quite as nice as it could be for this. Fix a typo: s/LOCKTYPR/LOCKTYPE/ Fixes: 0efc94c5d15c ("seqcount: Compress SEQCNT_LOCKNAME_ZERO()") Fixes: e4e9ab3f9f91 ("seqlock: Fold seqcount_LOCKNAME_init() definition") Fixes: a8772dccb2ec ("seqlock: Fold seqcount_LOCKNAME_t definition") Reported-by: kernel test robot Signed-off-by: Randy Dunlap Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200817000200.20993-1-rdunlap@infradead.org --- include/linux/seqlock.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 962d9768945f..300cbf312546 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -138,7 +138,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) #endif /** - * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPR associated + * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPE associated * @seqcount: The real sequence counter * @lock: Pointer to the associated spinlock * @@ -148,7 +148,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * that the write side critical section is properly serialized. */ -/** +/* * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t * @s: Pointer to the seqcount_LOCKNAME_t instance * @lock: Pointer to the associated LOCKTYPE @@ -217,7 +217,7 @@ SEQCOUNT_LOCKTYPE(rwlock_t, rwlock, false, s->lock) SEQCOUNT_LOCKTYPE(struct mutex, mutex, true, s->lock) SEQCOUNT_LOCKTYPE(struct ww_mutex, ww_mutex, true, &s->lock->base) -/** +/* * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t * @name: Name of the seqcount_LOCKNAME_t instance * @lock: Pointer to the associated LOCKTYPE @@ -688,7 +688,7 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s) * to miss an entire modification sequence, once it resumes it might * observe the new entry. * - * NOTE: + * NOTE2: * * When data is a dynamic data structure; one should use regular RCU * patterns to manage the lifetimes of the objects within. -- cgit v1.2.3 From e918188611f073063415f40fae568fa4d86d9044 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:20 +0800 Subject: locking: More accurate annotations for read_lock() On the archs using QUEUED_RWLOCKS, read_lock() is not always a recursive read lock, actually it's only recursive if in_interrupt() is true. So change the annotation accordingly to catch more deadlocks. Note we used to treat read_lock() as pure recursive read locks in lib/locking-seftest.c, and this is useful, especially for the lockdep development selftest, so we keep this via a variable to force switching lock annotation for read_lock(). Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-2-boqun.feng@gmail.com --- include/linux/lockdep.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 6a584b3e5c74..7cae5ea00d59 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -469,6 +469,20 @@ static inline void print_irqtrace_events(struct task_struct *curr) } #endif +/* Variable used to make lockdep treat read_lock() as recursive in selftests */ +#ifdef CONFIG_DEBUG_LOCKING_API_SELFTESTS +extern unsigned int force_read_lock_recursive; +#else /* CONFIG_DEBUG_LOCKING_API_SELFTESTS */ +#define force_read_lock_recursive 0 +#endif /* CONFIG_DEBUG_LOCKING_API_SELFTESTS */ + +#ifdef CONFIG_LOCKDEP +extern bool read_lock_is_recursive(void); +#else /* CONFIG_LOCKDEP */ +/* If !LOCKDEP, the value is meaningless */ +#define read_lock_is_recursive() 0 +#endif + /* * For trivial one-depth nesting of a lock-class, the following * global define can be used. (Subsystems with multiple levels @@ -490,7 +504,14 @@ static inline void print_irqtrace_events(struct task_struct *curr) #define spin_release(l, i) lock_release(l, i) #define rwlock_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) -#define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) +#define rwlock_acquire_read(l, s, t, i) \ +do { \ + if (read_lock_is_recursive()) \ + lock_acquire_shared_recursive(l, s, t, NULL, i); \ + else \ + lock_acquire_shared(l, s, t, NULL, i); \ +} while (0) + #define rwlock_release(l, i) lock_release(l, i) #define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) -- cgit v1.2.3 From bd76eca10de2eb9998d5125b08e8997cbf5508d5 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:24 +0800 Subject: lockdep: Reduce the size of lock_list::distance lock_list::distance is always not greater than MAX_LOCK_DEPTH (which is 48 right now), so a u16 will fit. This patch reduces the size of lock_list::distance to save space, so that we can introduce other fields to help detect recursive read lock deadlocks without increasing the size of lock_list structure. Suggested-by: Peter Zijlstra Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-6-boqun.feng@gmail.com --- include/linux/lockdep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 7cae5ea00d59..22750102b5fe 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -54,7 +54,7 @@ struct lock_list { struct lock_class *class; struct lock_class *links_to; const struct lock_trace *trace; - int distance; + u16 distance; /* * The parent field is used to implement breadth-first search, and the -- cgit v1.2.3 From 3454a36d6a39186de508dd43df590a6363364176 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:25 +0800 Subject: lockdep: Introduce lock_list::dep To add recursive read locks into the dependency graph, we need to store the types of dependencies for the BFS later. There are four types of dependencies: * Exclusive -> Non-recursive dependencies: EN e.g. write_lock(prev) held and try to acquire write_lock(next) or non-recursive read_lock(next), which can be represented as "prev -(EN)-> next" * Shared -> Non-recursive dependencies: SN e.g. read_lock(prev) held and try to acquire write_lock(next) or non-recursive read_lock(next), which can be represented as "prev -(SN)-> next" * Exclusive -> Recursive dependencies: ER e.g. write_lock(prev) held and try to acquire recursive read_lock(next), which can be represented as "prev -(ER)-> next" * Shared -> Recursive dependencies: SR e.g. read_lock(prev) held and try to acquire recursive read_lock(next), which can be represented as "prev -(SR)-> next" So we use 4 bits for the presence of each type in lock_list::dep. Helper functions and macros are also introduced to convert a pair of locks into lock_list::dep bit and maintain the addition of different types of dependencies. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-7-boqun.feng@gmail.com --- include/linux/lockdep.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 22750102b5fe..35c8bb0108dd 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -55,6 +55,8 @@ struct lock_list { struct lock_class *links_to; const struct lock_trace *trace; u16 distance; + /* bitmap of different dependencies from head to this */ + u8 dep; /* * The parent field is used to implement breadth-first search, and the -- cgit v1.2.3 From 6971c0f345620aae5e6172207a57b7524603a34e Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:26 +0800 Subject: lockdep: Extend __bfs() to work with multiple types of dependencies Now we have four types of dependencies in the dependency graph, and not all the pathes carry real dependencies (the dependencies that may cause a deadlock), for example: Given lock A and B, if we have: CPU1 CPU2 ============= ============== write_lock(A); read_lock(B); read_lock(B); write_lock(A); (assuming read_lock(B) is a recursive reader) then we have dependencies A -(ER)-> B, and B -(SN)-> A, and a dependency path A -(ER)-> B -(SN)-> A. In lockdep w/o recursive locks, a dependency path from A to A means a deadlock. However, the above case is obviously not a deadlock, because no one holds B exclusively, therefore no one waits for the other to release B, so who get A first in CPU1 and CPU2 will run non-blockingly. As a result, dependency path A -(ER)-> B -(SN)-> A is not a real/strong dependency that could cause a deadlock. From the observation above, we know that for a dependency path to be real/strong, no two adjacent dependencies can be as -(*R)-> -(S*)->. Now our mission is to make __bfs() traverse only the strong dependency paths, which is simple: we record whether we only have -(*R)-> for the previous lock_list of the path in lock_list::only_xr, and when we pick a dependency in the traverse, we 1) filter out -(S*)-> dependency if the previous lock_list only has -(*R)-> dependency (i.e. ->only_xr is true) and 2) set the next lock_list::only_xr to true if we only have -(*R)-> left after we filter out dependencies based on 1), otherwise, set it to false. With this extension for __bfs(), we now need to initialize the root of __bfs() properly (with a correct ->only_xr), to do so, we introduce some helper functions, which also cleans up a little bit for the __bfs() root initialization code. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-8-boqun.feng@gmail.com --- include/linux/lockdep.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 35c8bb0108dd..57d642d378c7 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -57,6 +57,8 @@ struct lock_list { u16 distance; /* bitmap of different dependencies from head to this */ u8 dep; + /* used by BFS to record whether "prev -> this" only has -(*R)-> */ + u8 only_xr; /* * The parent field is used to implement breadth-first search, and the -- cgit v1.2.3 From a2bee00cccf4f2a80412d38328b92f448fd66935 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Tue, 25 Aug 2020 22:17:43 +0200 Subject: spi: pxa2xx: Add SSC2 and SSPSP2 SSP registers Update list of SSP registers with SSC2 and SSPSP2. These registers are utilized by LPT/WPT AudioDSP architecture. While SSC2 shares the same offset (0x40) as SSACDD, description of this register for SSP device present on mentioned AudioDSP is different so define separate constant to avoid any ambiguity. Cc: Andy Shevchenko Signed-off-by: Cezary Rojewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200825201743.4926-1-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 6facf27865f9..1608c760fe91 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -186,6 +186,10 @@ #define SSIRF 0x48 /* RX FIFO trigger level */ #define SSIRF_RxThresh(x) ((x) - 1) +/* LPT/WPT SSP */ +#define SSCR2 (0x40) /* SSP Command / Status 2 */ +#define SSPSP2 (0x44) /* SSP Programmable Serial Protocol 2 */ + enum pxa_ssp_type { SSP_UNDEFINED = 0, PXA25x_SSP, /* pxa 210, 250, 255, 26x */ -- cgit v1.2.3 From 24da79902efc4a8443fae09e6c8e25b515bd8db2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Aug 2020 06:50:16 -0700 Subject: inet: remove inet_sk_copy_descendant() This is no longer used, SCTP now uses a private helper. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a44789d027cc..bac8f4fffbd6 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -345,17 +345,6 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) return (struct raw6_sock *)sk; } -static inline void inet_sk_copy_descendant(struct sock *sk_to, - const struct sock *sk_from) -{ - int ancestor_size = sizeof(struct inet_sock); - - if (sk_from->sk_family == PF_INET6) - ancestor_size += sizeof(struct ipv6_pinfo); - - __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); -} - #define __ipv6_only_sock(sk) (sk->sk_ipv6only) #define ipv6_only_sock(sk) (__ipv6_only_sock(sk)) #define ipv6_sk_rxinfo(sk) ((sk)->sk_family == PF_INET6 && \ -- cgit v1.2.3 From 5ca937fb5d6870735341d8fdacdd2b49618c35dc Mon Sep 17 00:00:00 2001 From: Subbaraman Narayanamurthy Date: Thu, 13 Aug 2020 11:34:08 -0700 Subject: power: supply: add wireless type Currently, power_supply framework supports only Battery, UPS, Mains and USB power_supply_type. Add wireless power_supply_type so that the drivers which supports wireless can register a power supply class device with POWER_SUPPLY_TYPE_WIRELESS. Signed-off-by: Subbaraman Narayanamurthy Signed-off-by: Guru Das Srinagesh Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index d0684362a392..81a55e974feb 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -186,6 +186,7 @@ enum power_supply_type { POWER_SUPPLY_TYPE_USB_PD, /* Power Delivery Port */ POWER_SUPPLY_TYPE_USB_PD_DRP, /* PD Dual Role Port */ POWER_SUPPLY_TYPE_APPLE_BRICK_ID, /* Apple Charging Method */ + POWER_SUPPLY_TYPE_WIRELESS, /* Wireless */ }; enum power_supply_usb_type { -- cgit v1.2.3 From 7f9fb67358a2bcaacbdfeee12e0f19e98c8bdf55 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Wed, 19 Aug 2020 15:34:56 +0800 Subject: regmap: add Intel SPI Slave to AVMM Bus Bridge support This patch add support for regmap APIs that are intended to be used by the drivers of some SPI slave chips which integrate the "SPI slave to Avalon Master Bridge" (spi-avmm) IP. The spi-avmm IP acts as a bridge to convert encoded streams of bytes from the host to the chip's internal register read/write on Avalon bus. The driver implements the register read/write operations for a generic SPI master to access the sub devices behind spi-avmm bridge. Signed-off-by: Xu Yilun Signed-off-by: Wu Hao Signed-off-by: Matthew Gerlach Signed-off-by: Russ Weight Reviewed-by: Tom Rix Reviewed-by: Luis Claudio R. Goncalves Link: https://lore.kernel.org/r/1597822497-25107-2-git-send-email-yilun.xu@intel.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 1970ed59d49f..d865d8fea535 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -567,6 +567,10 @@ struct regmap *__regmap_init_sdw(struct sdw_slave *sdw, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__regmap_init_spi_avmm(struct spi_device *spi, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); struct regmap *__devm_regmap_init(struct device *dev, const struct regmap_bus *bus, @@ -620,6 +624,10 @@ struct regmap *__devm_regmap_init_i3c(struct i3c_device *i3c, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__devm_regmap_init_spi_avmm(struct spi_device *spi, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); /* * Wrapper for regmap_init macros to include a unique lockdep key and name * for each call. No-op if CONFIG_LOCKDEP is not set. @@ -806,6 +814,19 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); __regmap_lockdep_wrapper(__regmap_init_sdw, #config, \ sdw, config) +/** + * regmap_init_spi_avmm() - Initialize register map for Intel SPI Slave + * to AVMM Bus Bridge + * + * @spi: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. + */ +#define regmap_init_spi_avmm(spi, config) \ + __regmap_lockdep_wrapper(__regmap_init_spi_avmm, #config, \ + spi, config) /** * devm_regmap_init() - Initialise managed register map @@ -993,6 +1014,21 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); __regmap_lockdep_wrapper(__devm_regmap_init_i3c, #config, \ i3c, config) +/** + * devm_regmap_init_spi_avmm() - Initialize register map for Intel SPI Slave + * to AVMM Bus Bridge + * + * @spi: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The map will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_spi_avmm(spi, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_spi_avmm, #config, \ + spi, config) + int regmap_mmio_attach_clk(struct regmap *map, struct clk *clk); void regmap_mmio_detach_clk(struct regmap *map); void regmap_exit(struct regmap *map); -- cgit v1.2.3 From e7aaf8748897d88fd1d17bfa461df84cf233d5a9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 26 Aug 2020 18:14:53 +0300 Subject: spi: pxa2xx: Update header block in pxa2xx_ssp.h We have direct users of some headers that are missed and have header included when forward declarations are enough. Update header block in pxa2xx_ssp.h to align with actual usage. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200826151455.55970-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 1608c760fe91..ae65fe635934 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -16,10 +16,15 @@ #ifndef __LINUX_SSP_H #define __LINUX_SSP_H -#include +#include #include -#include +#include +#include +#include +struct clk; +struct device; +struct device_node; /* * SSP Serial Port Registers -- cgit v1.2.3 From 410f4cf79f64b1831e207b89f3c7ab08e36aa646 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 26 Aug 2020 18:14:54 +0300 Subject: spi: pxa2xx: Switch to use BIT() and GENMASK() in pxa2xx_ssp.h Switch pxa2xx_ssp.h header to use BIT() and GENMASK(). It's better to read and understand. While here, correct ordering of some definitions. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200826151455.55970-2-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 150 ++++++++++++++++++++++----------------------- 1 file changed, 75 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index ae65fe635934..3f0f275bd630 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -16,6 +16,7 @@ #ifndef __LINUX_SSP_H #define __LINUX_SSP_H +#include #include #include #include @@ -48,130 +49,127 @@ struct device_node; #define SSACDD (0x40) /* SSP Audio Clock Dither Divider */ /* Common PXA2xx bits first */ -#define SSCR0_DSS (0x0000000f) /* Data Size Select (mask) */ +#define SSCR0_DSS GENMASK(3, 0) /* Data Size Select (mask) */ #define SSCR0_DataSize(x) ((x) - 1) /* Data Size Select [4..16] */ -#define SSCR0_FRF (0x00000030) /* FRame Format (mask) */ +#define SSCR0_FRF GENMASK(5, 4) /* FRame Format (mask) */ #define SSCR0_Motorola (0x0 << 4) /* Motorola's Serial Peripheral Interface (SPI) */ #define SSCR0_TI (0x1 << 4) /* Texas Instruments' Synchronous Serial Protocol (SSP) */ #define SSCR0_National (0x2 << 4) /* National Microwire */ -#define SSCR0_ECS (1 << 6) /* External clock select */ -#define SSCR0_SSE (1 << 7) /* Synchronous Serial Port Enable */ +#define SSCR0_ECS BIT(6) /* External clock select */ +#define SSCR0_SSE BIT(7) /* Synchronous Serial Port Enable */ #define SSCR0_SCR(x) ((x) << 8) /* Serial Clock Rate (mask) */ /* PXA27x, PXA3xx */ -#define SSCR0_EDSS (1 << 20) /* Extended data size select */ -#define SSCR0_NCS (1 << 21) /* Network clock select */ -#define SSCR0_RIM (1 << 22) /* Receive FIFO overrrun interrupt mask */ -#define SSCR0_TUM (1 << 23) /* Transmit FIFO underrun interrupt mask */ -#define SSCR0_FRDC (0x07000000) /* Frame rate divider control (mask) */ +#define SSCR0_EDSS BIT(20) /* Extended data size select */ +#define SSCR0_NCS BIT(21) /* Network clock select */ +#define SSCR0_RIM BIT(22) /* Receive FIFO overrrun interrupt mask */ +#define SSCR0_TUM BIT(23) /* Transmit FIFO underrun interrupt mask */ +#define SSCR0_FRDC GENMASK(26, 24) /* Frame rate divider control (mask) */ #define SSCR0_SlotsPerFrm(x) (((x) - 1) << 24) /* Time slots per frame [1..8] */ -#define SSCR0_FPCKE (1 << 29) /* FIFO packing enable */ -#define SSCR0_ACS (1 << 30) /* Audio clock select */ -#define SSCR0_MOD (1 << 31) /* Mode (normal or network) */ - - -#define SSCR1_RIE (1 << 0) /* Receive FIFO Interrupt Enable */ -#define SSCR1_TIE (1 << 1) /* Transmit FIFO Interrupt Enable */ -#define SSCR1_LBM (1 << 2) /* Loop-Back Mode */ -#define SSCR1_SPO (1 << 3) /* Motorola SPI SSPSCLK polarity setting */ -#define SSCR1_SPH (1 << 4) /* Motorola SPI SSPSCLK phase setting */ -#define SSCR1_MWDS (1 << 5) /* Microwire Transmit Data Size */ - -#define SSSR_ALT_FRM_MASK 3 /* Masks the SFRM signal number */ -#define SSSR_TNF (1 << 2) /* Transmit FIFO Not Full */ -#define SSSR_RNE (1 << 3) /* Receive FIFO Not Empty */ -#define SSSR_BSY (1 << 4) /* SSP Busy */ -#define SSSR_TFS (1 << 5) /* Transmit FIFO Service Request */ -#define SSSR_RFS (1 << 6) /* Receive FIFO Service Request */ -#define SSSR_ROR (1 << 7) /* Receive FIFO Overrun */ +#define SSCR0_FPCKE BIT(29) /* FIFO packing enable */ +#define SSCR0_ACS BIT(30) /* Audio clock select */ +#define SSCR0_MOD BIT(31) /* Mode (normal or network) */ + +#define SSCR1_RIE BIT(0) /* Receive FIFO Interrupt Enable */ +#define SSCR1_TIE BIT(1) /* Transmit FIFO Interrupt Enable */ +#define SSCR1_LBM BIT(2) /* Loop-Back Mode */ +#define SSCR1_SPO BIT(3) /* Motorola SPI SSPSCLK polarity setting */ +#define SSCR1_SPH BIT(4) /* Motorola SPI SSPSCLK phase setting */ +#define SSCR1_MWDS BIT(5) /* Microwire Transmit Data Size */ + +#define SSSR_ALT_FRM_MASK GENMASK(1, 0) /* Masks the SFRM signal number */ +#define SSSR_TNF BIT(2) /* Transmit FIFO Not Full */ +#define SSSR_RNE BIT(3) /* Receive FIFO Not Empty */ +#define SSSR_BSY BIT(4) /* SSP Busy */ +#define SSSR_TFS BIT(5) /* Transmit FIFO Service Request */ +#define SSSR_RFS BIT(6) /* Receive FIFO Service Request */ +#define SSSR_ROR BIT(7) /* Receive FIFO Overrun */ #define RX_THRESH_DFLT 8 #define TX_THRESH_DFLT 8 -#define SSSR_TFL_MASK (0xf << 8) /* Transmit FIFO Level mask */ -#define SSSR_RFL_MASK (0xf << 12) /* Receive FIFO Level mask */ +#define SSSR_TFL_MASK GENMASK(11, 8) /* Transmit FIFO Level mask */ +#define SSSR_RFL_MASK GENMASK(15, 12) /* Receive FIFO Level mask */ -#define SSCR1_TFT (0x000003c0) /* Transmit FIFO Threshold (mask) */ +#define SSCR1_TFT GENMASK(9, 6) /* Transmit FIFO Threshold (mask) */ #define SSCR1_TxTresh(x) (((x) - 1) << 6) /* level [1..16] */ -#define SSCR1_RFT (0x00003c00) /* Receive FIFO Threshold (mask) */ +#define SSCR1_RFT GENMASK(13, 10) /* Receive FIFO Threshold (mask) */ #define SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..16] */ #define RX_THRESH_CE4100_DFLT 2 #define TX_THRESH_CE4100_DFLT 2 -#define CE4100_SSSR_TFL_MASK (0x3 << 8) /* Transmit FIFO Level mask */ -#define CE4100_SSSR_RFL_MASK (0x3 << 12) /* Receive FIFO Level mask */ +#define CE4100_SSSR_TFL_MASK GENMASK(9, 8) /* Transmit FIFO Level mask */ +#define CE4100_SSSR_RFL_MASK GENMASK(13, 12) /* Receive FIFO Level mask */ -#define CE4100_SSCR1_TFT (0x000000c0) /* Transmit FIFO Threshold (mask) */ +#define CE4100_SSCR1_TFT GENMASK(7, 6) /* Transmit FIFO Threshold (mask) */ #define CE4100_SSCR1_TxTresh(x) (((x) - 1) << 6) /* level [1..4] */ -#define CE4100_SSCR1_RFT (0x00000c00) /* Receive FIFO Threshold (mask) */ +#define CE4100_SSCR1_RFT GENMASK(11, 10) /* Receive FIFO Threshold (mask) */ #define CE4100_SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..4] */ /* QUARK_X1000 SSCR0 bit definition */ -#define QUARK_X1000_SSCR0_DSS (0x1F << 0) /* Data Size Select (mask) */ +#define QUARK_X1000_SSCR0_DSS GENMASK(4, 0) /* Data Size Select (mask) */ #define QUARK_X1000_SSCR0_DataSize(x) ((x) - 1) /* Data Size Select [4..32] */ -#define QUARK_X1000_SSCR0_FRF (0x3 << 5) /* FRame Format (mask) */ +#define QUARK_X1000_SSCR0_FRF GENMASK(6, 5) /* FRame Format (mask) */ #define QUARK_X1000_SSCR0_Motorola (0x0 << 5) /* Motorola's Serial Peripheral Interface (SPI) */ #define RX_THRESH_QUARK_X1000_DFLT 1 #define TX_THRESH_QUARK_X1000_DFLT 16 -#define QUARK_X1000_SSSR_TFL_MASK (0x1F << 8) /* Transmit FIFO Level mask */ -#define QUARK_X1000_SSSR_RFL_MASK (0x1F << 13) /* Receive FIFO Level mask */ +#define QUARK_X1000_SSSR_TFL_MASK GENMASK(12, 8) /* Transmit FIFO Level mask */ +#define QUARK_X1000_SSSR_RFL_MASK GENMASK(17, 13) /* Receive FIFO Level mask */ -#define QUARK_X1000_SSCR1_TFT (0x1F << 6) /* Transmit FIFO Threshold (mask) */ +#define QUARK_X1000_SSCR1_TFT GENMASK(10, 6) /* Transmit FIFO Threshold (mask) */ #define QUARK_X1000_SSCR1_TxTresh(x) (((x) - 1) << 6) /* level [1..32] */ -#define QUARK_X1000_SSCR1_RFT (0x1F << 11) /* Receive FIFO Threshold (mask) */ +#define QUARK_X1000_SSCR1_RFT GENMASK(15, 11) /* Receive FIFO Threshold (mask) */ #define QUARK_X1000_SSCR1_RxTresh(x) (((x) - 1) << 11) /* level [1..32] */ -#define QUARK_X1000_SSCR1_STRF (1 << 17) /* Select FIFO or EFWR */ -#define QUARK_X1000_SSCR1_EFWR (1 << 16) /* Enable FIFO Write/Read */ +#define QUARK_X1000_SSCR1_EFWR BIT(16) /* Enable FIFO Write/Read */ +#define QUARK_X1000_SSCR1_STRF BIT(17) /* Select FIFO or EFWR */ /* extra bits in PXA255, PXA26x and PXA27x SSP ports */ #define SSCR0_TISSP (1 << 4) /* TI Sync Serial Protocol */ #define SSCR0_PSP (3 << 4) /* PSP - Programmable Serial Protocol */ -#define SSCR1_TTELP (1 << 31) /* TXD Tristate Enable Last Phase */ -#define SSCR1_TTE (1 << 30) /* TXD Tristate Enable */ -#define SSCR1_EBCEI (1 << 29) /* Enable Bit Count Error interrupt */ -#define SSCR1_SCFR (1 << 28) /* Slave Clock free Running */ -#define SSCR1_ECRA (1 << 27) /* Enable Clock Request A */ -#define SSCR1_ECRB (1 << 26) /* Enable Clock request B */ -#define SSCR1_SCLKDIR (1 << 25) /* Serial Bit Rate Clock Direction */ -#define SSCR1_SFRMDIR (1 << 24) /* Frame Direction */ -#define SSCR1_RWOT (1 << 23) /* Receive Without Transmit */ -#define SSCR1_TRAIL (1 << 22) /* Trailing Byte */ -#define SSCR1_TSRE (1 << 21) /* Transmit Service Request Enable */ -#define SSCR1_RSRE (1 << 20) /* Receive Service Request Enable */ -#define SSCR1_TINTE (1 << 19) /* Receiver Time-out Interrupt enable */ -#define SSCR1_PINTE (1 << 18) /* Peripheral Trailing Byte Interrupt Enable */ -#define SSCR1_IFS (1 << 16) /* Invert Frame Signal */ -#define SSCR1_STRF (1 << 15) /* Select FIFO or EFWR */ -#define SSCR1_EFWR (1 << 14) /* Enable FIFO Write/Read */ - -#define SSSR_BCE (1 << 23) /* Bit Count Error */ -#define SSSR_CSS (1 << 22) /* Clock Synchronisation Status */ -#define SSSR_TUR (1 << 21) /* Transmit FIFO Under Run */ -#define SSSR_EOC (1 << 20) /* End Of Chain */ -#define SSSR_TINT (1 << 19) /* Receiver Time-out Interrupt */ -#define SSSR_PINT (1 << 18) /* Peripheral Trailing Byte Interrupt */ +#define SSCR1_EFWR BIT(14) /* Enable FIFO Write/Read */ +#define SSCR1_STRF BIT(15) /* Select FIFO or EFWR */ +#define SSCR1_IFS BIT(16) /* Invert Frame Signal */ +#define SSCR1_PINTE BIT(18) /* Peripheral Trailing Byte Interrupt Enable */ +#define SSCR1_TINTE BIT(19) /* Receiver Time-out Interrupt enable */ +#define SSCR1_RSRE BIT(20) /* Receive Service Request Enable */ +#define SSCR1_TSRE BIT(21) /* Transmit Service Request Enable */ +#define SSCR1_TRAIL BIT(22) /* Trailing Byte */ +#define SSCR1_RWOT BIT(23) /* Receive Without Transmit */ +#define SSCR1_SFRMDIR BIT(24) /* Frame Direction */ +#define SSCR1_SCLKDIR BIT(25) /* Serial Bit Rate Clock Direction */ +#define SSCR1_ECRB BIT(26) /* Enable Clock request B */ +#define SSCR1_ECRA BIT(27) /* Enable Clock Request A */ +#define SSCR1_SCFR BIT(28) /* Slave Clock free Running */ +#define SSCR1_EBCEI BIT(29) /* Enable Bit Count Error interrupt */ +#define SSCR1_TTE BIT(30) /* TXD Tristate Enable */ +#define SSCR1_TTELP BIT(31) /* TXD Tristate Enable Last Phase */ + +#define SSSR_PINT BIT(18) /* Peripheral Trailing Byte Interrupt */ +#define SSSR_TINT BIT(19) /* Receiver Time-out Interrupt */ +#define SSSR_EOC BIT(20) /* End Of Chain */ +#define SSSR_TUR BIT(21) /* Transmit FIFO Under Run */ +#define SSSR_CSS BIT(22) /* Clock Synchronisation Status */ +#define SSSR_BCE BIT(23) /* Bit Count Error */ #define SSPSP_SCMODE(x) ((x) << 0) /* Serial Bit Rate Clock Mode */ -#define SSPSP_SFRMP (1 << 2) /* Serial Frame Polarity */ -#define SSPSP_ETDS (1 << 3) /* End of Transfer data State */ +#define SSPSP_SFRMP BIT(2) /* Serial Frame Polarity */ +#define SSPSP_ETDS BIT(3) /* End of Transfer data State */ #define SSPSP_STRTDLY(x) ((x) << 4) /* Start Delay */ #define SSPSP_DMYSTRT(x) ((x) << 7) /* Dummy Start */ #define SSPSP_SFRMDLY(x) ((x) << 9) /* Serial Frame Delay */ #define SSPSP_SFRMWDTH(x) ((x) << 16) /* Serial Frame Width */ #define SSPSP_DMYSTOP(x) ((x) << 23) /* Dummy Stop */ -#define SSPSP_FSRT (1 << 25) /* Frame Sync Relative Timing */ +#define SSPSP_FSRT BIT(25) /* Frame Sync Relative Timing */ /* PXA3xx */ #define SSPSP_EDMYSTRT(x) ((x) << 26) /* Extended Dummy Start */ #define SSPSP_EDMYSTOP(x) ((x) << 28) /* Extended Dummy Stop */ #define SSPSP_TIMING_MASK (0x7f8001f0) -#define SSACD_SCDB (1 << 3) /* SSPSYSCLK Divider Bypass */ -#define SSACD_ACPS(x) ((x) << 4) /* Audio clock PLL select */ #define SSACD_ACDS(x) ((x) << 0) /* Audio clock divider select */ #define SSACD_ACDS_1 (0) #define SSACD_ACDS_2 (1) @@ -179,14 +177,16 @@ struct device_node; #define SSACD_ACDS_8 (3) #define SSACD_ACDS_16 (4) #define SSACD_ACDS_32 (5) +#define SSACD_SCDB BIT(3) /* SSPSYSCLK Divider Bypass */ #define SSACD_SCDB_4X (0) #define SSACD_SCDB_1X (1) -#define SSACD_SCDX8 (1 << 7) /* SYSCLK division ratio select */ +#define SSACD_ACPS(x) ((x) << 4) /* Audio clock PLL select */ +#define SSACD_SCDX8 BIT(7) /* SYSCLK division ratio select */ /* LPSS SSP */ #define SSITF 0x44 /* TX FIFO trigger level */ +#define SSITF_TxHiThresh(x) (((x) - 1) << 0) #define SSITF_TxLoThresh(x) (((x) - 1) << 8) -#define SSITF_TxHiThresh(x) ((x) - 1) #define SSIRF 0x48 /* RX FIFO trigger level */ #define SSIRF_RxThresh(x) ((x) - 1) -- cgit v1.2.3 From 3a2fd4011a1ecec361498301a27d79d5fef255de Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 26 Aug 2020 18:14:55 +0300 Subject: spi: pxa2xx: Drop useless comment in the pxa2xx_ssp.h No need to have file name inside file. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200826151455.55970-3-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 3f0f275bd630..7f73b26ed22e 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * pxa2xx_ssp.h - * * Copyright (C) 2003 Russell King, All Rights Reserved. * * This driver supports the following PXA CPU/SSP ports:- -- cgit v1.2.3 From f468f21b7af0fa472ff8ff70f10b9b4995ef7eb3 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 26 Aug 2020 15:54:16 +0300 Subject: net: Take common prefetch code structure into a function Many device drivers use the same prefetch code structure to deal with small L1 cacheline size. Take this code into a function and call it from the drivers. Suggested-by: Jakub Kicinski Signed-off-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/netdevice.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b0e303f6603f..b8abe1d7aa0b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2193,6 +2193,22 @@ int netdev_get_num_tc(struct net_device *dev) return dev->num_tc; } +static inline void net_prefetch(void *p) +{ + prefetch(p); +#if L1_CACHE_BYTES < 128 + prefetch((u8 *)p + L1_CACHE_BYTES); +#endif +} + +static inline void net_prefetchw(void *p) +{ + prefetchw(p); +#if L1_CACHE_BYTES < 128 + prefetchw((u8 *)p + L1_CACHE_BYTES); +#endif +} + void netdev_unbind_sb_channel(struct net_device *dev, struct net_device *sb_dev); int netdev_bind_sb_channel_queue(struct net_device *dev, -- cgit v1.2.3 From f9f890ba2b13ea9ccfffd0e7354c7b64d9109790 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Thu, 30 Jul 2020 18:28:01 +0300 Subject: gpio: dwapb: Add max GPIOs macro Add a new macro DWAPB_MAX_GPIOS which defines the maximum possible number of GPIO lines corresponding to the maximum DW APB GPIO controller port width. Use the new macro instead of number literal 32 where it's applicable. Suggested-by: Andy Shevchenko Signed-off-by: Serge Semin Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200730152808.2955-5-Sergey.Semin@baikalelectronics.ru Signed-off-by: Linus Walleij --- include/linux/platform_data/gpio-dwapb.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-dwapb.h b/include/linux/platform_data/gpio-dwapb.h index ff1be737bad6..0aa5c6720259 100644 --- a/include/linux/platform_data/gpio-dwapb.h +++ b/include/linux/platform_data/gpio-dwapb.h @@ -6,12 +6,14 @@ #ifndef GPIO_DW_APB_H #define GPIO_DW_APB_H +#define DWAPB_MAX_GPIOS 32 + struct dwapb_port_property { struct fwnode_handle *fwnode; unsigned int idx; unsigned int ngpio; unsigned int gpio_base; - int irq[32]; + int irq[DWAPB_MAX_GPIOS]; bool irq_shared; }; -- cgit v1.2.3 From 2da45b8f069644604e8e05ccb03b2b66ada611d5 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Aug 2020 10:51:49 +0200 Subject: mtd: rawnand: Add a kernel doc to the ECC algorithm enumeration Before moving it to the generic raw NAND core, ensure the enumeration is properly described. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200827085208.16276-2-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index a725b620aca2..1495f22b60cb 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -92,6 +92,13 @@ enum nand_ecc_mode { NAND_ECC_ON_DIE, }; +/** + * enum nand_ecc_algo - NAND ECC algorithm + * @NAND_ECC_UNKNOWN: Unknown algorithm + * @NAND_ECC_HAMMING: Hamming algorithm + * @NAND_ECC_BCH: Bose-Chaudhuri-Hocquenghem algorithm + * @NAND_ECC_RS: Reed-Solomon algorithm + */ enum nand_ecc_algo { NAND_ECC_UNKNOWN, NAND_ECC_HAMMING, -- cgit v1.2.3 From e0a564ae0a4bc1bcf156d468955b27d3606e8253 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Aug 2020 10:51:50 +0200 Subject: mtd: rawnand: Rename the ECC algorithm enumeration items NAND_ECC_ is not a meaningful prefix, use NAND_ECC_ALGO_ instead. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200827085208.16276-3-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 1495f22b60cb..8174c0c331a1 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -94,16 +94,16 @@ enum nand_ecc_mode { /** * enum nand_ecc_algo - NAND ECC algorithm - * @NAND_ECC_UNKNOWN: Unknown algorithm - * @NAND_ECC_HAMMING: Hamming algorithm - * @NAND_ECC_BCH: Bose-Chaudhuri-Hocquenghem algorithm - * @NAND_ECC_RS: Reed-Solomon algorithm + * @NAND_ECC_ALGO_UNKNOWN: Unknown algorithm + * @NAND_ECC_ALGO_HAMMING: Hamming algorithm + * @NAND_ECC_ALGO_BCH: Bose-Chaudhuri-Hocquenghem algorithm + * @NAND_ECC_ALGO_RS: Reed-Solomon algorithm */ enum nand_ecc_algo { - NAND_ECC_UNKNOWN, - NAND_ECC_HAMMING, - NAND_ECC_BCH, - NAND_ECC_RS, + NAND_ECC_ALGO_UNKNOWN, + NAND_ECC_ALGO_HAMMING, + NAND_ECC_ALGO_BCH, + NAND_ECC_ALGO_RS, }; /* -- cgit v1.2.3 From f2f64c1e924131878179da64794d9cb18ee5c827 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Aug 2020 10:51:51 +0200 Subject: mtd: rawnand: Move the nand_ecc_algo enum to the generic NAND layer This enumeration is generic and will be reused NAND-wide. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200827085208.16276-4-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 14 ++++++++++++++ include/linux/mtd/rawnand.h | 14 -------------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index af99041ceaa9..986c7de83326 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -115,6 +115,20 @@ struct nand_page_io_req { int mode; }; +/** + * enum nand_ecc_algo - NAND ECC algorithm + * @NAND_ECC_ALGO_UNKNOWN: Unknown algorithm + * @NAND_ECC_ALGO_HAMMING: Hamming algorithm + * @NAND_ECC_ALGO_BCH: Bose-Chaudhuri-Hocquenghem algorithm + * @NAND_ECC_ALGO_RS: Reed-Solomon algorithm + */ +enum nand_ecc_algo { + NAND_ECC_ALGO_UNKNOWN, + NAND_ECC_ALGO_HAMMING, + NAND_ECC_ALGO_BCH, + NAND_ECC_ALGO_RS, +}; + /** * struct nand_ecc_props - NAND ECC properties * @strength: ECC strength diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 8174c0c331a1..10bbfbf4ad7f 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -92,20 +92,6 @@ enum nand_ecc_mode { NAND_ECC_ON_DIE, }; -/** - * enum nand_ecc_algo - NAND ECC algorithm - * @NAND_ECC_ALGO_UNKNOWN: Unknown algorithm - * @NAND_ECC_ALGO_HAMMING: Hamming algorithm - * @NAND_ECC_ALGO_BCH: Bose-Chaudhuri-Hocquenghem algorithm - * @NAND_ECC_ALGO_RS: Reed-Solomon algorithm - */ -enum nand_ecc_algo { - NAND_ECC_ALGO_UNKNOWN, - NAND_ECC_ALGO_HAMMING, - NAND_ECC_ALGO_BCH, - NAND_ECC_ALGO_RS, -}; - /* * Constants for Hardware ECC */ -- cgit v1.2.3 From 701981cab01696584a12e5f0e7c2ad931a326059 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Aug 2020 10:51:52 +0200 Subject: mtd: nand: Add a NAND page I/O request type Use an enum to differentiate the type of I/O (reading or writing a page). Also update the request iterator. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200827085208.16276-5-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 986c7de83326..e754a6fc8a4b 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -82,8 +82,19 @@ struct nand_pos { unsigned int page; }; +/** + * enum nand_page_io_req_type - Direction of an I/O request + * @NAND_PAGE_READ: from the chip, to the controller + * @NAND_PAGE_WRITE: from the controller, to the chip + */ +enum nand_page_io_req_type { + NAND_PAGE_READ = 0, + NAND_PAGE_WRITE, +}; + /** * struct nand_page_io_req - NAND I/O request object + * @type: the type of page I/O: read or write * @pos: the position this I/O request is targeting * @dataoffs: the offset within the page * @datalen: number of data bytes to read from/write to this page @@ -99,6 +110,7 @@ struct nand_pos { * specific commands/operations. */ struct nand_page_io_req { + enum nand_page_io_req_type type; struct nand_pos pos; unsigned int dataoffs; unsigned int datalen; @@ -638,11 +650,13 @@ static inline void nanddev_pos_next_page(struct nand_device *nand, * layer. */ static inline void nanddev_io_iter_init(struct nand_device *nand, + enum nand_page_io_req_type reqtype, loff_t offs, struct mtd_oob_ops *req, struct nand_io_iter *iter) { struct mtd_info *mtd = nanddev_to_mtd(nand); + iter->req.type = reqtype; iter->req.mode = req->mode; iter->req.dataoffs = nanddev_offs_to_pos(nand, offs, &iter->req.pos); iter->req.ooboffs = req->ooboffs; @@ -712,8 +726,8 @@ static inline bool nanddev_io_iter_end(struct nand_device *nand, * * Should be used for iterate over pages that are contained in an MTD request. */ -#define nanddev_io_for_each_page(nand, start, req, iter) \ - for (nanddev_io_iter_init(nand, start, req, iter); \ +#define nanddev_io_for_each_page(nand, type, start, req, iter) \ + for (nanddev_io_iter_init(nand, type, start, req, iter); \ !nanddev_io_iter_end(nand, iter); \ nanddev_io_iter_next_page(nand, iter)) -- cgit v1.2.3 From 7c4b1ab9f16732fb921b3f11cd127fa65f26ad5c Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Tue, 18 Aug 2020 14:52:45 +0300 Subject: IB/mlx5: Add DCT RoCE LAG support When DCT QPs work in RoCE LAG mode: 1. DCT creation is allowed only when it is supported 2. The "port" of a DCT QP is assigned in a round-robin way Link: https://lore.kernel.org/r/20200818115245.700581-3-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index de1ffb4804d6..aee25e4fb2cc 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1430,7 +1430,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_bf_reg_size[0x5]; - u8 reserved_at_270[0x8]; + u8 reserved_at_270[0x6]; + u8 lag_dct[0x2]; u8 lag_tx_port_affinity[0x1]; u8 reserved_at_279[0x2]; u8 lag_master[0x1]; -- cgit v1.2.3 From 1c9c02bb22684f6949d2e7ddc0a3ff364fd5a6fc Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 27 Apr 2020 14:50:37 -0500 Subject: mtd: lpddr: Fix bad logic in print_drs_error Update logic for broken test. Use a more common logging style. It appears the logic in this function is broken for the consecutive tests of if (prog_status & 0x3) ... else if (prog_status & 0x2) ... else (prog_status & 0x1) ... Likely the first test should be if ((prog_status & 0x3) == 0x3) Found by inspection of include files using printk. Fixes: eb3db27507f7 ("[MTD] LPDDR PFOW definition") Cc: stable@vger.kernel.org Reported-by: Joe Perches Signed-off-by: Gustavo A. R. Silva Acked-by: Miquel Raynal Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/3fb0e29f5b601db8be2938a01d974b00c8788501.1588016644.git.gustavo@embeddedor.com --- include/linux/mtd/pfow.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h index 6166e7c60869..b8da6f8e854b 100644 --- a/include/linux/mtd/pfow.h +++ b/include/linux/mtd/pfow.h @@ -128,7 +128,7 @@ static inline void print_drs_error(unsigned dsr) if (!(dsr & DSR_AVAILABLE)) printk(KERN_NOTICE"DSR.15: (0) Device not Available\n"); - if (prog_status & 0x03) + if ((prog_status & 0x03) == 0x03) printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid " "half with 41h command\n"); else if (prog_status & 0x02) -- cgit v1.2.3 From 518693abe6e3f57606ec18892e9135abbc04b361 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 27 Apr 2020 14:54:13 -0500 Subject: mtd: lpddr: Replace printk with pr_notice pr_notice is preferred over printk. Also, coalesce formats as coalescing is part of coding-style: "never break user-visible strings such as printk messages" Suggested-by: Joe Perches Signed-off-by: Gustavo A. R. Silva Reviewed-by: Miquel Raynal Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/ff48ce07ef208ba65b858f09279a3b36031d64d2.1588016644.git.gustavo@embeddedor.com --- include/linux/mtd/pfow.h | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h index b8da6f8e854b..1c0f6113f230 100644 --- a/include/linux/mtd/pfow.h +++ b/include/linux/mtd/pfow.h @@ -127,31 +127,26 @@ static inline void print_drs_error(unsigned dsr) int prog_status = (dsr & DSR_RPS) >> 8; if (!(dsr & DSR_AVAILABLE)) - printk(KERN_NOTICE"DSR.15: (0) Device not Available\n"); + pr_notice("DSR.15: (0) Device not Available\n"); if ((prog_status & 0x03) == 0x03) - printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid " - "half with 41h command\n"); + pr_notice("DSR.9,8: (11) Attempt to program invalid half with 41h command\n"); else if (prog_status & 0x02) - printk(KERN_NOTICE"DSR.9,8: (10) Object Mode Program attempt " - "in region with Control Mode data\n"); + pr_notice("DSR.9,8: (10) Object Mode Program attempt in region with Control Mode data\n"); else if (prog_status & 0x01) - printk(KERN_NOTICE"DSR.9,8: (01) Program attempt in region " - "with Object Mode data\n"); + pr_notice("DSR.9,8: (01) Program attempt in region with Object Mode data\n"); if (!(dsr & DSR_READY_STATUS)) - printk(KERN_NOTICE"DSR.7: (0) Device is Busy\n"); + pr_notice("DSR.7: (0) Device is Busy\n"); if (dsr & DSR_ESS) - printk(KERN_NOTICE"DSR.6: (1) Erase Suspended\n"); + pr_notice("DSR.6: (1) Erase Suspended\n"); if (dsr & DSR_ERASE_STATUS) - printk(KERN_NOTICE"DSR.5: (1) Erase/Blank check error\n"); + pr_notice("DSR.5: (1) Erase/Blank check error\n"); if (dsr & DSR_PROGRAM_STATUS) - printk(KERN_NOTICE"DSR.4: (1) Program Error\n"); + pr_notice("DSR.4: (1) Program Error\n"); if (dsr & DSR_VPPS) - printk(KERN_NOTICE"DSR.3: (1) Vpp low detect, operation " - "aborted\n"); + pr_notice("DSR.3: (1) Vpp low detect, operation aborted\n"); if (dsr & DSR_PSS) - printk(KERN_NOTICE"DSR.2: (1) Program suspended\n"); + pr_notice("DSR.2: (1) Program suspended\n"); if (dsr & DSR_DPS) - printk(KERN_NOTICE"DSR.1: (1) Aborted Erase/Program attempt " - "on locked block\n"); + pr_notice("DSR.1: (1) Aborted Erase/Program attempt on locked block\n"); } #endif /* __LINUX_MTD_PFOW_H */ -- cgit v1.2.3 From 1a64026eda1642c81425e48550fd4bd3f73d0ab5 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 27 Apr 2020 14:56:08 -0500 Subject: mtd: lpddr: Move function print_drs_error to lpddr_cmds.c Function print_drs_error is only used in drivers/mtd/lpddr/lpddr_cmds.c so, better to move it there. Also, notice that there's no need for inline as the function is used once. Lastly, fix the following checkpatch warning: WARNING: Prefer 'unsigned int' to bare use of 'unsigned' +static void print_drs_error(unsigned dsr) Suggested-by: Joe Perches Signed-off-by: Gustavo A. R. Silva Reviewed-by: Miquel Raynal Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/e0063cbd65f3b47be1db34efc494ea3047634d88.1588016644.git.gustavo@embeddedor.com --- include/linux/mtd/pfow.h | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h index 1c0f6113f230..146413d4bdb7 100644 --- a/include/linux/mtd/pfow.h +++ b/include/linux/mtd/pfow.h @@ -121,32 +121,4 @@ static inline void send_pfow_command(struct map_info *map, map_write(map, CMD(LPDDR_START_EXECUTION), map->pfow_base + PFOW_COMMAND_EXECUTE); } - -static inline void print_drs_error(unsigned dsr) -{ - int prog_status = (dsr & DSR_RPS) >> 8; - - if (!(dsr & DSR_AVAILABLE)) - pr_notice("DSR.15: (0) Device not Available\n"); - if ((prog_status & 0x03) == 0x03) - pr_notice("DSR.9,8: (11) Attempt to program invalid half with 41h command\n"); - else if (prog_status & 0x02) - pr_notice("DSR.9,8: (10) Object Mode Program attempt in region with Control Mode data\n"); - else if (prog_status & 0x01) - pr_notice("DSR.9,8: (01) Program attempt in region with Object Mode data\n"); - if (!(dsr & DSR_READY_STATUS)) - pr_notice("DSR.7: (0) Device is Busy\n"); - if (dsr & DSR_ESS) - pr_notice("DSR.6: (1) Erase Suspended\n"); - if (dsr & DSR_ERASE_STATUS) - pr_notice("DSR.5: (1) Erase/Blank check error\n"); - if (dsr & DSR_PROGRAM_STATUS) - pr_notice("DSR.4: (1) Program Error\n"); - if (dsr & DSR_VPPS) - pr_notice("DSR.3: (1) Vpp low detect, operation aborted\n"); - if (dsr & DSR_PSS) - pr_notice("DSR.2: (1) Program suspended\n"); - if (dsr & DSR_DPS) - pr_notice("DSR.1: (1) Aborted Erase/Program attempt on locked block\n"); -} #endif /* __LINUX_MTD_PFOW_H */ -- cgit v1.2.3 From 2fa4e4b799e191530edbae4b96b85d4486e55053 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 27 Aug 2020 04:00:28 +0200 Subject: net: pcs: Move XPCS into new PCS subdirectory Create drivers/net/pcs and move the Synopsys DesignWare XPCS into the new directory. Move the header file into a subdirectory include/linux/pcs Start a naming convention of all PCS files use the prefix pcs-, and rename the XPCS files to fit. v2: Add include/linux/pcs v4: Fix include path in stmmac. Remove PCS_DEVICES to avoid new prompts Cc: Jose Abreu Reviewed-by: Florian Fainelli Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mdio-xpcs.h | 41 ----------------------------------------- include/linux/pcs/pcs-xpcs.h | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 41 deletions(-) delete mode 100644 include/linux/mdio-xpcs.h create mode 100644 include/linux/pcs/pcs-xpcs.h (limited to 'include/linux') diff --git a/include/linux/mdio-xpcs.h b/include/linux/mdio-xpcs.h deleted file mode 100644 index 9a841aa5982d..000000000000 --- a/include/linux/mdio-xpcs.h +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (c) 2020 Synopsys, Inc. and/or its affiliates. - * Synopsys DesignWare XPCS helpers - */ - -#ifndef __LINUX_MDIO_XPCS_H -#define __LINUX_MDIO_XPCS_H - -#include -#include - -struct mdio_xpcs_args { - __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); - struct mii_bus *bus; - int addr; -}; - -struct mdio_xpcs_ops { - int (*validate)(struct mdio_xpcs_args *xpcs, - unsigned long *supported, - struct phylink_link_state *state); - int (*config)(struct mdio_xpcs_args *xpcs, - const struct phylink_link_state *state); - int (*get_state)(struct mdio_xpcs_args *xpcs, - struct phylink_link_state *state); - int (*link_up)(struct mdio_xpcs_args *xpcs, int speed, - phy_interface_t interface); - int (*probe)(struct mdio_xpcs_args *xpcs, phy_interface_t interface); -}; - -#if IS_ENABLED(CONFIG_MDIO_XPCS) -struct mdio_xpcs_ops *mdio_xpcs_get_ops(void); -#else -static inline struct mdio_xpcs_ops *mdio_xpcs_get_ops(void) -{ - return NULL; -} -#endif - -#endif /* __LINUX_MDIO_XPCS_H */ diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h new file mode 100644 index 000000000000..351c1c9aedc5 --- /dev/null +++ b/include/linux/pcs/pcs-xpcs.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare XPCS helpers + */ + +#ifndef __LINUX_PCS_XPCS_H +#define __LINUX_PCS_XPCS_H + +#include +#include + +struct mdio_xpcs_args { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + struct mii_bus *bus; + int addr; +}; + +struct mdio_xpcs_ops { + int (*validate)(struct mdio_xpcs_args *xpcs, + unsigned long *supported, + struct phylink_link_state *state); + int (*config)(struct mdio_xpcs_args *xpcs, + const struct phylink_link_state *state); + int (*get_state)(struct mdio_xpcs_args *xpcs, + struct phylink_link_state *state); + int (*link_up)(struct mdio_xpcs_args *xpcs, int speed, + phy_interface_t interface); + int (*probe)(struct mdio_xpcs_args *xpcs, phy_interface_t interface); +}; + +#if IS_ENABLED(CONFIG_PCS_XPCS) +struct mdio_xpcs_ops *mdio_xpcs_get_ops(void); +#else +static inline struct mdio_xpcs_ops *mdio_xpcs_get_ops(void) +{ + return NULL; +} +#endif + +#endif /* __LINUX_PCS_XPCS_H */ -- cgit v1.2.3 From fcba68bd75bb1d42b3aec7f471d382a9e639a672 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 27 Aug 2020 04:00:29 +0200 Subject: net/phy/mdio-i2c: Move header file to include/linux/mdio In preparation for moving all MDIO drivers into drivers/net/mdio, move the mdio-i2c header file into include/linux/mdio so it can be used by both the MDIO driver and the SFP code which instantiates I2C MDIO busses. v2: Add include/linux/mdio Reviewed-by: Florian Fainelli Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mdio/mdio-i2c.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/mdio/mdio-i2c.h (limited to 'include/linux') diff --git a/include/linux/mdio/mdio-i2c.h b/include/linux/mdio/mdio-i2c.h new file mode 100644 index 000000000000..b1d27f7cd23f --- /dev/null +++ b/include/linux/mdio/mdio-i2c.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * MDIO I2C bridge + * + * Copyright (C) 2015 Russell King + */ +#ifndef MDIO_I2C_H +#define MDIO_I2C_H + +struct device; +struct i2c_adapter; +struct mii_bus; + +struct mii_bus *mdio_i2c_alloc(struct device *parent, struct i2c_adapter *i2c); + +#endif -- cgit v1.2.3 From 232e15e1d7ddb191c28248cb681f4544c0ff1c54 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 27 Aug 2020 04:00:30 +0200 Subject: net: xgene: Move shared header file into include/linux This header file is currently included into the ethernet driver via a relative path into the PHY subsystem. This is bad practice, and causes issues for the upcoming move of the MDIO driver. Move the header file into include/linux to clean this up. v2: Move header to include/linux/mdio Reviewed-by: Florian Fainelli Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mdio/mdio-xgene.h | 130 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 include/linux/mdio/mdio-xgene.h (limited to 'include/linux') diff --git a/include/linux/mdio/mdio-xgene.h b/include/linux/mdio/mdio-xgene.h new file mode 100644 index 000000000000..8af93ada8b64 --- /dev/null +++ b/include/linux/mdio/mdio-xgene.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Applied Micro X-Gene SoC MDIO Driver + * + * Copyright (c) 2016, Applied Micro Circuits Corporation + * Author: Iyappan Subramanian + */ + +#ifndef __MDIO_XGENE_H__ +#define __MDIO_XGENE_H__ + +#define BLOCK_XG_MDIO_CSR_OFFSET 0x5000 +#define BLOCK_DIAG_CSR_OFFSET 0xd000 +#define XGENET_CONFIG_REG_ADDR 0x20 + +#define MAC_ADDR_REG_OFFSET 0x00 +#define MAC_COMMAND_REG_OFFSET 0x04 +#define MAC_WRITE_REG_OFFSET 0x08 +#define MAC_READ_REG_OFFSET 0x0c +#define MAC_COMMAND_DONE_REG_OFFSET 0x10 + +#define CLKEN_OFFSET 0x08 +#define SRST_OFFSET 0x00 + +#define MENET_CFG_MEM_RAM_SHUTDOWN_ADDR 0x70 +#define MENET_BLOCK_MEM_RDY_ADDR 0x74 + +#define MAC_CONFIG_1_ADDR 0x00 +#define MII_MGMT_COMMAND_ADDR 0x24 +#define MII_MGMT_ADDRESS_ADDR 0x28 +#define MII_MGMT_CONTROL_ADDR 0x2c +#define MII_MGMT_STATUS_ADDR 0x30 +#define MII_MGMT_INDICATORS_ADDR 0x34 +#define SOFT_RESET BIT(31) + +#define MII_MGMT_CONFIG_ADDR 0x20 +#define MII_MGMT_COMMAND_ADDR 0x24 +#define MII_MGMT_ADDRESS_ADDR 0x28 +#define MII_MGMT_CONTROL_ADDR 0x2c +#define MII_MGMT_STATUS_ADDR 0x30 +#define MII_MGMT_INDICATORS_ADDR 0x34 + +#define MIIM_COMMAND_ADDR 0x20 +#define MIIM_FIELD_ADDR 0x24 +#define MIIM_CONFIGURATION_ADDR 0x28 +#define MIIM_LINKFAILVECTOR_ADDR 0x2c +#define MIIM_INDICATOR_ADDR 0x30 +#define MIIMRD_FIELD_ADDR 0x34 + +#define MDIO_CSR_OFFSET 0x5000 + +#define REG_ADDR_POS 0 +#define REG_ADDR_LEN 5 +#define PHY_ADDR_POS 8 +#define PHY_ADDR_LEN 5 + +#define HSTMIIMWRDAT_POS 0 +#define HSTMIIMWRDAT_LEN 16 +#define HSTPHYADX_POS 23 +#define HSTPHYADX_LEN 5 +#define HSTREGADX_POS 18 +#define HSTREGADX_LEN 5 +#define HSTLDCMD BIT(3) +#define HSTMIIMCMD_POS 0 +#define HSTMIIMCMD_LEN 3 + +#define BUSY_MASK BIT(0) +#define READ_CYCLE_MASK BIT(0) + +enum xgene_enet_cmd { + XGENE_ENET_WR_CMD = BIT(31), + XGENE_ENET_RD_CMD = BIT(30) +}; + +enum { + MIIM_CMD_IDLE, + MIIM_CMD_LEGACY_WRITE, + MIIM_CMD_LEGACY_READ, +}; + +enum xgene_mdio_id { + XGENE_MDIO_RGMII = 1, + XGENE_MDIO_XFI +}; + +struct xgene_mdio_pdata { + struct clk *clk; + struct device *dev; + void __iomem *mac_csr_addr; + void __iomem *diag_csr_addr; + void __iomem *mdio_csr_addr; + struct mii_bus *mdio_bus; + int mdio_id; + spinlock_t mac_lock; /* mac lock */ +}; + +/* Set the specified value into a bit-field defined by its starting position + * and length within a single u64. + */ +static inline u64 xgene_enet_set_field_value(int pos, int len, u64 val) +{ + return (val & ((1ULL << len) - 1)) << pos; +} + +#define SET_VAL(field, val) \ + xgene_enet_set_field_value(field ## _POS, field ## _LEN, val) + +#define SET_BIT(field) \ + xgene_enet_set_field_value(field ## _POS, 1, 1) + +/* Get the value from a bit-field defined by its starting position + * and length within the specified u64. + */ +static inline u64 xgene_enet_get_field_value(int pos, int len, u64 src) +{ + return (src >> pos) & ((1ULL << len) - 1); +} + +#define GET_VAL(field, src) \ + xgene_enet_get_field_value(field ## _POS, field ## _LEN, src) + +#define GET_BIT(field, src) \ + xgene_enet_get_field_value(field ## _POS, 1, src) + +u32 xgene_mdio_rd_mac(struct xgene_mdio_pdata *pdata, u32 rd_addr); +void xgene_mdio_wr_mac(struct xgene_mdio_pdata *pdata, u32 wr_addr, u32 data); +int xgene_mdio_rgmii_read(struct mii_bus *bus, int phy_id, int reg); +int xgene_mdio_rgmii_write(struct mii_bus *bus, int phy_id, int reg, u16 data); +struct phy_device *xgene_enet_phy_register(struct mii_bus *bus, int phy_addr); + +#endif /* __MDIO_XGENE_H__ */ -- cgit v1.2.3 From 17529bcf0ae20f1ac6d7846762bf0c6ad91dbb7f Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 27 Aug 2020 10:48:28 +0200 Subject: power: supply: gpio-charger: Convert to GPIO descriptors This converts the GPIO charger to use exclusively GPIO descriptors, moving the two remaining platforms passing global GPIO numbers over to using a GPIO descriptor table. Signed-off-by: Linus Walleij Cc: Robert Jarzmik Cc: Dmitry Eremin-Solenikov Signed-off-by: Sebastian Reichel --- include/linux/power/gpio-charger.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power/gpio-charger.h b/include/linux/power/gpio-charger.h index 5a5a8de98181..c0b7657ac1df 100644 --- a/include/linux/power/gpio-charger.h +++ b/include/linux/power/gpio-charger.h @@ -13,18 +13,12 @@ * struct gpio_charger_platform_data - platform_data for gpio_charger devices * @name: Name for the chargers power_supply device * @type: Type of the charger - * @gpio: GPIO which is used to indicate the chargers status - * @gpio_active_low: Should be set to 1 if the GPIO is active low otherwise 0 * @supplied_to: Array of battery names to which this chargers supplies power * @num_supplicants: Number of entries in the supplied_to array */ struct gpio_charger_platform_data { const char *name; enum power_supply_type type; - - int gpio; - int gpio_active_low; - char **supplied_to; size_t num_supplicants; }; -- cgit v1.2.3 From dab741e0e02bd3c4f5e2e97be74b39df2523fc6e Mon Sep 17 00:00:00 2001 From: Mattias Nissler Date: Thu, 27 Aug 2020 11:09:46 -0600 Subject: Add a "nosymfollow" mount option. For mounts that have the new "nosymfollow" option, don't follow symlinks when resolving paths. The new option is similar in spirit to the existing "nodev", "noexec", and "nosuid" options, as well as to the LOOKUP_NO_SYMLINKS resolve flag in the openat2(2) syscall. Various BSD variants have been supporting the "nosymfollow" mount option for a long time with equivalent implementations. Note that symlinks may still be created on file systems mounted with the "nosymfollow" option present. readlink() remains functional, so user space code that is aware of symlinks can still choose to follow them explicitly. Setting the "nosymfollow" mount option helps prevent privileged writers from modifying files unintentionally in case there is an unexpected link along the accessed path. The "nosymfollow" option is thus useful as a defensive measure for systems that need to deal with untrusted file systems in privileged contexts. More information on the history and motivation for this patch can be found here: https://sites.google.com/a/chromium.org/dev/chromium-os/chromiumos-design-docs/hardening-against-malicious-stateful-data#TOC-Restricting-symlink-traversal Signed-off-by: Mattias Nissler Signed-off-by: Ross Zwisler Reviewed-by: Aleksa Sarai Signed-off-by: Al Viro --- include/linux/mount.h | 3 ++- include/linux/statfs.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index de657bd211fa..aaf343b38671 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -30,6 +30,7 @@ struct fs_context; #define MNT_NODIRATIME 0x10 #define MNT_RELATIME 0x20 #define MNT_READONLY 0x40 /* does the user want this to be r/o? */ +#define MNT_NOSYMFOLLOW 0x80 #define MNT_SHRINKABLE 0x100 #define MNT_WRITE_HOLD 0x200 @@ -46,7 +47,7 @@ struct fs_context; #define MNT_SHARED_MASK (MNT_UNBINDABLE) #define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \ | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ - | MNT_READONLY) + | MNT_READONLY | MNT_NOSYMFOLLOW) #define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ diff --git a/include/linux/statfs.h b/include/linux/statfs.h index 9bc69edb8f18..fac4356ea1bf 100644 --- a/include/linux/statfs.h +++ b/include/linux/statfs.h @@ -40,6 +40,7 @@ struct kstatfs { #define ST_NOATIME 0x0400 /* do not update access times */ #define ST_NODIRATIME 0x0800 /* do not update directory access times */ #define ST_RELATIME 0x1000 /* update atime relative to mtime/ctime */ +#define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */ struct dentry; extern int vfs_get_fsid(struct dentry *dentry, __kernel_fsid_t *fsid); -- cgit v1.2.3 From 6bbe2a90a0bb4af8dd99c3565e907fe9b5e7fd88 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 17 Aug 2020 11:38:27 -0700 Subject: usb: typec: tcpm: During PR_SWAP, source caps should be sent only after tSwapSourceStart The patch addresses the compliance test failures while running TD.PD.CP.E3, TD.PD.CP.E4, TD.PD.CP.E5 of the "Deterministic PD Compliance MOI" test plan published in https://www.usb.org/usbc. For a product to be Type-C compliant, it's expected that these tests are run on usb.org certified Type-C compliance tester as mentioned in https://www.usb.org/usbc. The purpose of the tests TD.PD.CP.E3, TD.PD.CP.E4, TD.PD.CP.E5 is to verify the PR_SWAP response of the device. While doing so, the test asserts that Source Capabilities message is NOT received from the test device within tSwapSourceStart min (20 ms) from the time the last bit of GoodCRC corresponding to the RS_RDY message sent by the UUT was sent. If it does then the test fails. This is in line with the requirements from the USB Power Delivery Specification Revision 3.0, Version 1.2: "6.6.8.1 SwapSourceStartTimer The SwapSourceStartTimer Shall be used by the new Source, after a Power Role Swap or Fast Role Swap, to ensure that it does not send Source_Capabilities Message before the new Sink is ready to receive the Source_Capabilities Message. The new Source Shall Not send the Source_Capabilities Message earlier than tSwapSourceStart after the last bit of the EOP of GoodCRC Message sent in response to the PS_RDY Message sent by the new Source indicating that its power supply is ready." The patch makes sure that TCPM does not send the Source_Capabilities Message within tSwapSourceStart(20ms) by transitioning into SRC_STARTUP only after tSwapSourceStart(20ms). Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200817183828.1895015-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index b6c233e79bd4..1df895e4680b 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -473,6 +473,7 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_T_ERROR_RECOVERY 100 /* minimum 25 is insufficient */ #define PD_T_SRCSWAPSTDBY 625 /* Maximum of 650ms */ #define PD_T_NEWSRC 250 /* Maximum of 275ms */ +#define PD_T_SWAP_SRC_START 20 /* Minimum of 20ms */ #define PD_T_DRP_TRY 100 /* 75 - 150 ms */ #define PD_T_DRP_TRYWAIT 600 /* 400 - 800 ms */ -- cgit v1.2.3 From aefc66afe42bcae01743c0b3c5addd089263801b Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 17 Aug 2020 11:38:28 -0700 Subject: usb: typec: pd: Fix formatting in pd.h header Replacing spaces with tabs for PD_T_* constants. Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200817183828.1895015-2-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index 1df895e4680b..f842e4589bd2 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -471,9 +471,10 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_T_VCONN_SOURCE_ON 100 #define PD_T_SINK_REQUEST 100 /* 100 ms minimum */ #define PD_T_ERROR_RECOVERY 100 /* minimum 25 is insufficient */ -#define PD_T_SRCSWAPSTDBY 625 /* Maximum of 650ms */ -#define PD_T_NEWSRC 250 /* Maximum of 275ms */ +#define PD_T_SRCSWAPSTDBY 625 /* Maximum of 650ms */ +#define PD_T_NEWSRC 250 /* Maximum of 275ms */ #define PD_T_SWAP_SRC_START 20 /* Minimum of 20ms */ +#define PD_T_BIST_CONT_MODE 50 /* 30 - 60 ms */ #define PD_T_DRP_TRY 100 /* 75 - 150 ms */ #define PD_T_DRP_TRYWAIT 600 /* 400 - 800 ms */ @@ -484,5 +485,4 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_N_CAPS_COUNT (PD_T_NO_RESPONSE / PD_T_SEND_SOURCE_CAP) #define PD_N_HARD_RESET_COUNT 2 -#define PD_T_BIST_CONT_MODE 50 /* 30 - 60 ms */ #endif /* __LINUX_USB_PD_H */ -- cgit v1.2.3 From d3cd0071a89a085a2d578b6a9e31db10469501c6 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Mon, 27 Jul 2020 13:12:18 +0200 Subject: eeprom: at25: allow page sizes greater than 16 bit Storage technologies like FRAM have no "write pages", the whole chip can be written within one SPI transfer. For these chips, the page size can be set equal to the device size. Currently available devices are already bigger than 64 kiB. Signed-off-by: Christian Eggers Link: https://lore.kernel.org/r/20200727111218.26926-1-ceggers@arri.de Signed-off-by: Greg Kroah-Hartman --- include/linux/spi/eeprom.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/eeprom.h b/include/linux/spi/eeprom.h index aceccf9c71fb..1cca3dd5a748 100644 --- a/include/linux/spi/eeprom.h +++ b/include/linux/spi/eeprom.h @@ -14,7 +14,7 @@ struct spi_eeprom { u32 byte_len; char name[10]; - u16 page_size; /* for writes */ + u32 page_size; /* for writes */ u16 flags; #define EE_ADDR1 0x0001 /* 8 bit addrs */ #define EE_ADDR2 0x0002 /* 16 bit addrs */ -- cgit v1.2.3 From 145fbd1e82e6654398ace321432e3a803b8a2be4 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 28 Aug 2020 11:41:41 +0300 Subject: dmaengine: Remove unused define for dma_request_slave_channel_reason() No users left in the kernel, it can be removed. Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20200828084141.14902-1-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 6fbd5c99e30c..011371b7f081 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1527,8 +1527,6 @@ static inline int dma_get_slave_caps(struct dma_chan *chan, } #endif -#define dma_request_slave_channel_reason(dev, name) dma_request_chan(dev, name) - static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx) { struct dma_slave_caps caps; -- cgit v1.2.3 From 31b4b3bff2c217c61bb99db76f24c4886e74a1a5 Mon Sep 17 00:00:00 2001 From: Sebastian Fricke Date: Mon, 3 Aug 2020 07:43:45 +0200 Subject: include/linux/miscdevice.h - Fix typo/grammar Improve the clarity and grammar of descriptive comment on top of the minor number assignments. Fix a typo within 2 comments for macros. s/This helps in eleminating of boilerplate code. /This helps to eliminate boilerplate code./ Signed-off-by: Sebastian Fricke Link: https://lore.kernel.org/r/20200803054346.4285-1-sebastian.fricke.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/miscdevice.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index c7a93002a3c1..0676f18093f9 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -7,9 +7,9 @@ #include /* - * These allocations are managed by device@lanana.org. If you use an - * entry that is not in assigned your entry may well be moved and - * reassigned, or set dynamic if a fixed value is not justified. + * These allocations are managed by device@lanana.org. If you need + * an entry that is not assigned here, it can be moved and + * reassigned or dynamically set if a fixed value is not justified. */ #define PSMOUSE_MINOR 1 @@ -93,14 +93,14 @@ extern void misc_deregister(struct miscdevice *misc); /* * Helper macro for drivers that don't do anything special in the initcall. - * This helps in eleminating of boilerplate code. + * This helps to eliminate boilerplate code. */ #define builtin_misc_device(__misc_device) \ builtin_driver(__misc_device, misc_register) /* * Helper macro for drivers that don't do anything special in module init / exit - * call. This helps in eleminating of boilerplate code. + * call. This helps to eliminate boilerplate code. */ #define module_misc_device(__misc_device) \ module_driver(__misc_device, misc_register, misc_deregister) -- cgit v1.2.3 From cf5153e4338c4cca655a4a4eff6d54c6a0adcdb7 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Wed, 12 Aug 2020 02:27:19 +0200 Subject: media: gpu: host1x: mipi: Keep MIPI clock enabled and mutex locked till calibration done With the split of MIPI calibration into tegra_mipi_calibrate() and tegra_mipi_wait(), MIPI clock is not kept enabled and mutex is not locked till the calibration is done. So, this patch keeps MIPI clock enabled and mutex locked after triggering start of calibration till its done. To let calibration process go through its finite sequence codes before calibration logic waiting for pads idle state added wait time of 75usec to make sure it sees idle state to apply the results. This patch renames tegra_mipi_calibrate() as tegra_mipi_start_calibration() and tegra_mipi_wait() as tegra_mipi_finish_calibration() to be inline with their usage. Reviewed-by: Dmitry Osipenko Signed-off-by: Sowjanya Komatineni Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/host1x.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 20c885d0bddc..ce59a6a6a008 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -333,7 +333,7 @@ struct tegra_mipi_device *tegra_mipi_request(struct device *device, void tegra_mipi_free(struct tegra_mipi_device *device); int tegra_mipi_enable(struct tegra_mipi_device *device); int tegra_mipi_disable(struct tegra_mipi_device *device); -int tegra_mipi_calibrate(struct tegra_mipi_device *device); -int tegra_mipi_wait(struct tegra_mipi_device *device); +int tegra_mipi_start_calibration(struct tegra_mipi_device *device); +int tegra_mipi_finish_calibration(struct tegra_mipi_device *device); #endif -- cgit v1.2.3 From b6f3e21b928a8ae7959a0d79203b80bd70120768 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Wed, 26 Aug 2020 16:41:58 +0200 Subject: power: supply: smb347-charger: Drop pdata support There are no platforms using the pdata support, so let's drop it to simplify the driver. Reviewed-by: Dmitry Osipenko Tested-by: Dmitry Osipenko Signed-off-by: Sebastian Reichel --- include/linux/power/smb347-charger.h | 114 ----------------------------------- 1 file changed, 114 deletions(-) delete mode 100644 include/linux/power/smb347-charger.h (limited to 'include/linux') diff --git a/include/linux/power/smb347-charger.h b/include/linux/power/smb347-charger.h deleted file mode 100644 index e0b687a4d20c..000000000000 --- a/include/linux/power/smb347-charger.h +++ /dev/null @@ -1,114 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Summit Microelectronics SMB347 Battery Charger Driver - * - * Copyright (C) 2011, Intel Corporation - * - * Authors: Bruce E. Robertson - * Mika Westerberg - */ - -#ifndef SMB347_CHARGER_H -#define SMB347_CHARGER_H - -#include -#include - -enum { - /* use the default compensation method */ - SMB347_SOFT_TEMP_COMPENSATE_DEFAULT = -1, - - SMB347_SOFT_TEMP_COMPENSATE_NONE, - SMB347_SOFT_TEMP_COMPENSATE_CURRENT, - SMB347_SOFT_TEMP_COMPENSATE_VOLTAGE, -}; - -/* Use default factory programmed value for hard/soft temperature limit */ -#define SMB347_TEMP_USE_DEFAULT -273 - -/* - * Charging enable can be controlled by software (via i2c) by - * smb347-charger driver or by EN pin (active low/high). - */ -enum smb347_chg_enable { - SMB347_CHG_ENABLE_SW, - SMB347_CHG_ENABLE_PIN_ACTIVE_LOW, - SMB347_CHG_ENABLE_PIN_ACTIVE_HIGH, -}; - -/** - * struct smb347_charger_platform_data - platform data for SMB347 charger - * @battery_info: Information about the battery - * @max_charge_current: maximum current (in uA) the battery can be charged - * @max_charge_voltage: maximum voltage (in uV) the battery can be charged - * @pre_charge_current: current (in uA) to use in pre-charging phase - * @termination_current: current (in uA) used to determine when the - * charging cycle terminates - * @pre_to_fast_voltage: voltage (in uV) treshold used for transitioning to - * pre-charge to fast charge mode - * @mains_current_limit: maximum input current drawn from AC/DC input (in uA) - * @usb_hc_current_limit: maximum input high current (in uA) drawn from USB - * input - * @chip_temp_threshold: die temperature where device starts limiting charge - * current [%100 - %130] (in degree C) - * @soft_cold_temp_limit: soft cold temperature limit [%0 - %15] (in degree C), - * granularity is 5 deg C. - * @soft_hot_temp_limit: soft hot temperature limit [%40 - %55] (in degree C), - * granularity is 5 deg C. - * @hard_cold_temp_limit: hard cold temperature limit [%-5 - %10] (in degree C), - * granularity is 5 deg C. - * @hard_hot_temp_limit: hard hot temperature limit [%50 - %65] (in degree C), - * granularity is 5 deg C. - * @suspend_on_hard_temp_limit: suspend charging when hard limit is hit - * @soft_temp_limit_compensation: compensation method when soft temperature - * limit is hit - * @charge_current_compensation: current (in uA) for charging compensation - * current when temperature hits soft limits - * @use_mains: AC/DC input can be used - * @use_usb: USB input can be used - * @use_usb_otg: USB OTG output can be used (not implemented yet) - * @irq_gpio: GPIO number used for interrupts (%-1 if not used) - * @enable_control: how charging enable/disable is controlled - * (driver/pin controls) - * - * @use_main, @use_usb, and @use_usb_otg are means to enable/disable - * hardware support for these. This is useful when we want to have for - * example OTG charging controlled via OTG transceiver driver and not by - * the SMB347 hardware. - * - * Hard and soft temperature limit values are given as described in the - * device data sheet and assuming NTC beta value is %3750. Even if this is - * not the case, these values should be used. They can be mapped to the - * corresponding NTC beta values with the help of table %2 in the data - * sheet. So for example if NTC beta is %3375 and we want to program hard - * hot limit to be %53 deg C, @hard_hot_temp_limit should be set to %50. - * - * If zero value is given in any of the current and voltage values, the - * factory programmed default will be used. For soft/hard temperature - * values, pass in %SMB347_TEMP_USE_DEFAULT instead. - */ -struct smb347_charger_platform_data { - struct power_supply_info battery_info; - unsigned int max_charge_current; - unsigned int max_charge_voltage; - unsigned int pre_charge_current; - unsigned int termination_current; - unsigned int pre_to_fast_voltage; - unsigned int mains_current_limit; - unsigned int usb_hc_current_limit; - unsigned int chip_temp_threshold; - int soft_cold_temp_limit; - int soft_hot_temp_limit; - int hard_cold_temp_limit; - int hard_hot_temp_limit; - bool suspend_on_hard_temp_limit; - unsigned int soft_temp_limit_compensation; - unsigned int charge_current_compensation; - bool use_mains; - bool use_usb; - bool use_usb_otg; - int irq_gpio; - enum smb347_chg_enable enable_control; -}; - -#endif /* SMB347_CHARGER_H */ -- cgit v1.2.3 From f4d05259213ff1e91f767c91dcab455f68308fac Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 27 Aug 2020 18:18:06 -0700 Subject: bpf: Add map_meta_equal map ops Some properties of the inner map is used in the verification time. When an inner map is inserted to an outer map at runtime, bpf_map_meta_equal() is currently used to ensure those properties of the inserting inner map stays the same as the verification time. In particular, the current bpf_map_meta_equal() checks max_entries which turns out to be too restrictive for most of the maps which do not use max_entries during the verification time. It limits the use case that wants to replace a smaller inner map with a larger inner map. There are some maps do use max_entries during verification though. For example, the map_gen_lookup in array_map_ops uses the max_entries to generate the inline lookup code. To accommodate differences between maps, the map_meta_equal is added to bpf_map_ops. Each map-type can decide what to check when its map is used as an inner map during runtime. Also, some map types cannot be used as an inner map and they are currently black listed in bpf_map_meta_alloc() in map_in_map.c. It is not unusual that the new map types may not aware that such blacklist exists. This patch enforces an explicit opt-in and only allows a map to be used as an inner map if it has implemented the map_meta_equal ops. It is based on the discussion in [1]. All maps that support inner map has its map_meta_equal points to bpf_map_meta_equal in this patch. A later patch will relax the max_entries check for most maps. bpf_types.h counts 28 map types. This patch adds 23 ".map_meta_equal" by using coccinelle. -5 for BPF_MAP_TYPE_PROG_ARRAY BPF_MAP_TYPE_(PERCPU)_CGROUP_STORAGE BPF_MAP_TYPE_STRUCT_OPS BPF_MAP_TYPE_ARRAY_OF_MAPS BPF_MAP_TYPE_HASH_OF_MAPS The "if (inner_map->inner_map_meta)" check in bpf_map_meta_alloc() is moved such that the same error is returned. [1]: https://lore.kernel.org/bpf/20200522022342.899756-1-kafai@fb.com/ Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200828011806.1970400-1-kafai@fb.com --- include/linux/bpf.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a6131d95e31e..dbba82a80087 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -112,6 +112,19 @@ struct bpf_map_ops { void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap, void *owner, u32 size); struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner); + + /* map_meta_equal must be implemented for maps that can be + * used as an inner map. It is a runtime check to ensure + * an inner map can be inserted to an outer map. + * + * Some properties of the inner map has been used during the + * verification time. When inserting an inner map at the runtime, + * map_meta_equal has to ensure the inserting map has the same + * properties that the verifier has used earlier. + */ + bool (*map_meta_equal)(const struct bpf_map *meta0, + const struct bpf_map *meta1); + /* BTF name and id of struct allocated by map_alloc */ const char * const map_btf_name; int *map_btf_id; @@ -235,6 +248,9 @@ int map_check_no_btf(const struct bpf_map *map, const struct btf_type *key_type, const struct btf_type *value_type); +bool bpf_map_meta_equal(const struct bpf_map *meta0, + const struct bpf_map *meta1); + extern const struct bpf_map_ops bpf_map_offload_ops; /* function argument constraints */ -- cgit v1.2.3 From 316cdaa1158af17250397054f92bb339fbd8e282 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Wed, 26 Aug 2020 09:05:35 -0700 Subject: net: add option to not create fall-back tunnels in root-ns as well The sysctl that was added earlier by commit 79134e6ce2c ("net: do not create fallback tunnels for non-default namespaces") to create fall-back only in root-ns. This patch enhances that behavior to provide option not to create fallback tunnels in root-ns as well. Since modules that create fallback tunnels could be built-in and setting the sysctl value after booting is pointless, so added a kernel cmdline options to change this default. The default setting is preseved for backward compatibility. The kernel command line option of fb_tunnels=initns will set the sysctl value to 1 and will create fallback tunnels only in initns while kernel cmdline fb_tunnels=none will set the sysctl value to 2 and fallback tunnels are skipped in every netns. Signed-off-by: Mahesh Bandewar Cc: Eric Dumazet Cc: Maciej Zenczykowski Cc: Jian Yang Cc: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b8abe1d7aa0b..c0b512e6a02b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -640,10 +640,14 @@ struct netdev_queue { extern int sysctl_fb_tunnels_only_for_init_net; extern int sysctl_devconf_inherit_init_net; +/* + * sysctl_fb_tunnels_only_for_init_net == 0 : For all netns + * == 1 : For initns only + * == 2 : For none. + */ static inline bool net_has_fallback_tunnels(const struct net *net) { - return net == &init_net || - !IS_ENABLED(CONFIG_SYSCTL) || + return (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1) || !sysctl_fb_tunnels_only_for_init_net; } -- cgit v1.2.3 From 9584051f3cf3af181e577960956bb7c085879b67 Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Thu, 14 May 2020 16:04:27 -0700 Subject: power: supply: charger-manager: Remove cm_notify_event function cm_notify_event() was introduced to get an event associated with the battery status externally (ie in board files), but no one ever used it. Moreover it makes charger manager driver more complicated. Drop the function and all data related to it to simplify the driver. Signed-off-by: Jonghwa Lee Signed-off-by: Krzysztof Kozlowski Signed-off-by: Jonathan Bakker Signed-off-by: Sebastian Reichel --- include/linux/power/charger-manager.h | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h index ae94dcebd936..3a98837684e3 100644 --- a/include/linux/power/charger-manager.h +++ b/include/linux/power/charger-manager.h @@ -31,16 +31,10 @@ enum polling_modes { CM_POLL_CHARGING_ONLY, }; -enum cm_event_types { - CM_EVENT_UNKNOWN = 0, - CM_EVENT_BATT_FULL, - CM_EVENT_BATT_IN, - CM_EVENT_BATT_OUT, - CM_EVENT_BATT_OVERHEAT, - CM_EVENT_BATT_COLD, - CM_EVENT_EXT_PWR_IN_OUT, - CM_EVENT_CHG_START_STOP, - CM_EVENT_OTHERS, +enum cm_batt_temp { + CM_BATT_OK = 0, + CM_BATT_OVERHEAT, + CM_BATT_COLD, }; /** @@ -131,11 +125,10 @@ struct charger_regulator { * @psy_name: the name of power-supply-class for charger manager * @polling_mode: * Determine which polling mode will be used - * @fullbatt_vchkdrop_ms: * @fullbatt_vchkdrop_uV: * Check voltage drop after the battery is fully charged. - * If it has dropped more than fullbatt_vchkdrop_uV after - * fullbatt_vchkdrop_ms, CM will restart charging. + * If it has dropped more than fullbatt_vchkdrop_uV + * CM will restart charging. * @fullbatt_uV: voltage in microvolt * If VBATT >= fullbatt_uV, it is assumed to be full. * @fullbatt_soc: state of Charge in % @@ -172,7 +165,6 @@ struct charger_desc { enum polling_modes polling_mode; unsigned int polling_interval_ms; - unsigned int fullbatt_vchkdrop_ms; unsigned int fullbatt_vchkdrop_uV; unsigned int fullbatt_uV; unsigned int fullbatt_soc; @@ -211,9 +203,6 @@ struct charger_desc { * @charger_stat: array of power_supply for chargers * @tzd_batt : thermal zone device for battery * @charger_enabled: the state of charger - * @fullbatt_vchk_jiffies_at: - * jiffies at the time full battery check will occur. - * @fullbatt_vchk_work: work queue for full battery check * @emergency_stop: * When setting true, stop charging * @psy_name_buf: the name of power-supply-class for charger manager @@ -235,9 +224,6 @@ struct charger_manager { #endif bool charger_enabled; - unsigned long fullbatt_vchk_jiffies_at; - struct delayed_work fullbatt_vchk_work; - int emergency_stop; char psy_name_buf[PSY_NAME_MAX + 1]; @@ -248,11 +234,4 @@ struct charger_manager { u64 charging_end_time; }; -#if IS_ENABLED(CONFIG_CHARGER_MANAGER) -extern void cm_notify_event(struct power_supply *psy, - enum cm_event_types type, char *msg); -#else -static inline void cm_notify_event(struct power_supply *psy, - enum cm_event_types type, char *msg) { } -#endif #endif /* _CHARGER_MANAGER_H */ -- cgit v1.2.3 From e132fc6bb89bd307cfcdb8ba24afcd1985261485 Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Thu, 14 May 2020 16:04:31 -0700 Subject: power: supply: charger-manager: Make decisions focussed on battery status cm_monitor(), where charging management starts, checks various charging condition sequentially to decide next charging operation. However, as it follows sequential process, cascaded if statements, it does some jobs which have already done in the previous stage. This results in a delay in decision making. Moreover, starting point of charging is spread all around which makes maintain code and debugging difficult. Both of the problems mentioned above become clean if it manages battery charging focusing on battery status not following sequential condition checking. Now, cm_monitor() moves battery state diagram and does the optimal operation for current state. As a result, it reduces whole monitoring time almost in half. Signed-off-by: Jonghwa Lee Signed-off-by: Krzysztof Kozlowski Signed-off-by: Jonathan Bakker Signed-off-by: Sebastian Reichel --- include/linux/power/charger-manager.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h index 3a98837684e3..c127dbe31e49 100644 --- a/include/linux/power/charger-manager.h +++ b/include/linux/power/charger-manager.h @@ -213,6 +213,7 @@ struct charger_desc { * saved status of battery before entering suspend-to-RAM * @charging_start_time: saved start time of enabling charging * @charging_end_time: saved end time of disabling charging + * @battery_status: Current battery status */ struct charger_manager { struct list_head entry; @@ -232,6 +233,8 @@ struct charger_manager { u64 charging_start_time; u64 charging_end_time; + + int battery_status; }; #endif /* _CHARGER_MANAGER_H */ -- cgit v1.2.3 From c1f73028f75df43689feda4bc70573b7d18a618e Mon Sep 17 00:00:00 2001 From: Jonathan Bakker Date: Thu, 14 May 2020 16:04:33 -0700 Subject: power: supply: charger-manager: Update extcon functions In commit 830ae442202e ("extcon: Remove the deprecated extcon functions") the function extcon_register_interest became a no-op returning an error, leading to non-functional behaviour in charger-manager. Additionally, a translation table is needed between the text representation of the extcon cable names and their IDs is needed. In order to retain DT compatibility, TA and CHARGE-DOWNSTREAM are added as they were present up until commit 11eecf910bd8 ("extcon: Modify the id and name of external connector") Signed-off-by: Jonathan Bakker Signed-off-by: Sebastian Reichel --- include/linux/power/charger-manager.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h index c127dbe31e49..45e228b353ea 100644 --- a/include/linux/power/charger-manager.h +++ b/include/linux/power/charger-manager.h @@ -40,7 +40,7 @@ enum cm_batt_temp { /** * struct charger_cable * @extcon_name: the name of extcon device. - * @name: the name of charger cable(external connector). + * @name: the name of the cable connector * @extcon_dev: the extcon device. * @wq: the workqueue to control charger according to the state of * charger cable. If charger cable is attached, enable charger. @@ -56,9 +56,10 @@ enum cm_batt_temp { struct charger_cable { const char *extcon_name; const char *name; + struct extcon_dev *extcon_dev; + u64 extcon_type; /* The charger-manager use Extcon framework */ - struct extcon_specific_cable_nb extcon_dev; struct work_struct wq; struct notifier_block nb; -- cgit v1.2.3 From 4afc41dfa5a716e9e7a90c22972583f337c0bcbf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Aug 2020 00:52:43 +0200 Subject: netfilter: conntrack: remove ignore stats This counter increments when nf_conntrack_in sees a packet that already has a conntrack attached or when the packet is marked as UNTRACKED. Neither is an error. The former is normal for loopback traffic. The second happens for certain ICMPv6 packets or when nftables/ip(6)tables rules are in place. In case someone needs to count UNTRACKED packets, or packets that are marked as untracked before conntrack_in this can be done with both nftables and ip(6)tables rules. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 1db83c931d9c..96b90d7e361f 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -8,7 +8,6 @@ struct ip_conntrack_stat { unsigned int found; unsigned int invalid; - unsigned int ignore; unsigned int insert; unsigned int insert_failed; unsigned int drop; -- cgit v1.2.3 From bc92470413f3af152db0d8f90ef3eb13f8cc417a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Aug 2020 00:52:44 +0200 Subject: netfilter: conntrack: add clash resolution stat counter There is a misconception about what "insert_failed" means. We increment this even when a clash got resolved, so it might not indicate a problem. Add a dedicated counter for clash resolution and only increment insert_failed if a clash cannot be resolved. For the old /proc interface, export this in place of an older stat that got removed a while back. For ctnetlink, export this with a new attribute. Also correct an outdated comment that implies we add a duplicate tuple -- we only add the (unique) reply direction. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 96b90d7e361f..0c7d8d1e945d 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -10,6 +10,7 @@ struct ip_conntrack_stat { unsigned int invalid; unsigned int insert; unsigned int insert_failed; + unsigned int clash_resolve; unsigned int drop; unsigned int early_drop; unsigned int error; -- cgit v1.2.3 From 1e6c62a8821557720a9b2ea9617359b264f2f67c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 27 Aug 2020 15:01:11 -0700 Subject: bpf: Introduce sleepable BPF programs Introduce sleepable BPF programs that can request such property for themselves via BPF_F_SLEEPABLE flag at program load time. In such case they will be able to use helpers like bpf_copy_from_user() that might sleep. At present only fentry/fexit/fmod_ret and lsm programs can request to be sleepable and only when they are attached to kernel functions that are known to allow sleeping. The non-sleepable programs are relying on implicit rcu_read_lock() and migrate_disable() to protect life time of programs, maps that they use and per-cpu kernel structures used to pass info between bpf programs and the kernel. The sleepable programs cannot be enclosed into rcu_read_lock(). migrate_disable() maps to preempt_disable() in non-RT kernels, so the progs should not be enclosed in migrate_disable() as well. Therefore rcu_read_lock_trace is used to protect the life time of sleepable progs. There are many networking and tracing program types. In many cases the 'struct bpf_prog *' pointer itself is rcu protected within some other kernel data structure and the kernel code is using rcu_dereference() to load that program pointer and call BPF_PROG_RUN() on it. All these cases are not touched. Instead sleepable bpf programs are allowed with bpf trampoline only. The program pointers are hard-coded into generated assembly of bpf trampoline and synchronize_rcu_tasks_trace() is used to protect the life time of the program. The same trampoline can hold both sleepable and non-sleepable progs. When rcu_read_lock_trace is held it means that some sleepable bpf program is running from bpf trampoline. Those programs can use bpf arrays and preallocated hash/lru maps. These map types are waiting on programs to complete via synchronize_rcu_tasks_trace(); Updates to trampoline now has to do synchronize_rcu_tasks_trace() and synchronize_rcu_tasks() to wait for sleepable progs to finish and for trampoline assembly to finish. This is the first step of introducing sleepable progs. Eventually dynamically allocated hash maps can be allowed and networking program types can become sleepable too. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Reviewed-by: Josef Bacik Acked-by: Andrii Nakryiko Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20200827220114.69225-3-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index dbba82a80087..4dd7e927621d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -539,6 +539,8 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, /* these two functions are called from generated trampoline */ u64 notrace __bpf_prog_enter(void); void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); +void notrace __bpf_prog_enter_sleepable(void); +void notrace __bpf_prog_exit_sleepable(void); struct bpf_ksym { unsigned long start; @@ -734,6 +736,7 @@ struct bpf_prog_aux { bool offload_requested; bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool func_proto_unreliable; + bool sleepable; enum bpf_tramp_prog_type trampoline_prog_type; struct bpf_trampoline *trampoline; struct hlist_node tramp_hlist; -- cgit v1.2.3 From 07be4c4a3e7a0db148e44b16c5190e753d1c8569 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 27 Aug 2020 15:01:12 -0700 Subject: bpf: Add bpf_copy_from_user() helper. Sleepable BPF programs can now use copy_from_user() to access user memory. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20200827220114.69225-4-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4dd7e927621d..c6d9f2c444f4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1784,6 +1784,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; +extern const struct bpf_func_proto bpf_copy_from_user_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); -- cgit v1.2.3 From 9667305c6374df8672be46bc496f52f040999531 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 31 Aug 2020 08:51:55 -0700 Subject: bpf: Fix build without BPF_SYSCALL, but with BPF_JIT. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_BPF_SYSCALL is not set, but CONFIG_BPF_JIT=y the kernel build fails: In file included from ../kernel/bpf/trampoline.c:11: ../kernel/bpf/trampoline.c: In function ‘bpf_trampoline_update’: ../kernel/bpf/trampoline.c:220:39: error: ‘call_rcu_tasks_trace’ undeclared ../kernel/bpf/trampoline.c: In function ‘__bpf_prog_enter_sleepable’: ../kernel/bpf/trampoline.c:411:2: error: implicit declaration of function ‘rcu_read_lock_trace’ ../kernel/bpf/trampoline.c: In function ‘__bpf_prog_exit_sleepable’: ../kernel/bpf/trampoline.c:416:2: error: implicit declaration of function ‘rcu_read_unlock_trace’ This is due to: obj-$(CONFIG_BPF_JIT) += trampoline.o obj-$(CONFIG_BPF_JIT) += dispatcher.o There is a number of functions that arch/x86/net/bpf_jit_comp.c is using from these two files, but none of them will be used when only cBPF is on (which is the case for BPF_SYSCALL=n BPF_JIT=y). Add rcu_trace functions to rcupdate_trace.h. The JITed code won't execute them and BPF trampoline logic won't be used without BPF_SYSCALL. Fixes: 1e6c62a88215 ("bpf: Introduce sleepable BPF programs") Reported-by: kernel test robot Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Paul E. McKenney Link: https://lore.kernel.org/bpf/20200831155155.62754-1-alexei.starovoitov@gmail.com --- include/linux/rcupdate_trace.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index d9015aac78c6..aaaac8ac927c 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -82,7 +82,14 @@ static inline void rcu_read_unlock_trace(void) void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func); void synchronize_rcu_tasks_trace(void); void rcu_barrier_tasks_trace(void); - +#else +/* + * The BPF JIT forms these addresses even when it doesn't call these + * functions, so provide definitions that result in runtime errors. + */ +static inline void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func) { BUG(); } +static inline void rcu_read_lock_trace(void) { BUG(); } +static inline void rcu_read_unlock_trace(void) { BUG(); } #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ #endif /* __LINUX_RCUPDATE_TRACE_H */ -- cgit v1.2.3 From 1742b3d528690ae7773cf7bf2f01a90ee1de2fe0 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 28 Aug 2020 10:26:15 +0200 Subject: xsk: i40e: ice: ixgbe: mlx5: Pass buffer pool to driver instead of umem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the explicit umem reference passed to the driver in AF_XDP zero-copy mode with the buffer pool instead. This in preparation for extending the functionality of the zero-copy mode so that umems can be shared between queues on the same netdev and also between netdevs. In this commit, only an umem reference has been added to the buffer pool struct. But later commits will add other entities to it. These are going to be entities that are different between different queue ids and netdevs even though the umem is shared between them. Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/1598603189-32145-2-git-send-email-magnus.karlsson@intel.com --- include/linux/netdevice.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b0e303f6603f..d088dd866825 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -618,7 +618,7 @@ struct netdev_queue { /* Subordinate device that the queue has been assigned to */ struct net_device *sb_dev; #ifdef CONFIG_XDP_SOCKETS - struct xdp_umem *umem; + struct xsk_buff_pool *pool; #endif /* * write-mostly part @@ -751,7 +751,7 @@ struct netdev_rx_queue { struct net_device *dev; struct xdp_rxq_info xdp_rxq; #ifdef CONFIG_XDP_SOCKETS - struct xdp_umem *umem; + struct xsk_buff_pool *pool; #endif } ____cacheline_aligned_in_smp; @@ -879,7 +879,7 @@ enum bpf_netdev_command { /* BPF program for offload callbacks, invoked at program load time. */ BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE, - XDP_SETUP_XSK_UMEM, + XDP_SETUP_XSK_POOL, }; struct bpf_prog_offload_ops; @@ -913,9 +913,9 @@ struct netdev_bpf { struct { struct bpf_offloaded_map *offmap; }; - /* XDP_SETUP_XSK_UMEM */ + /* XDP_SETUP_XSK_POOL */ struct { - struct xdp_umem *umem; + struct xsk_buff_pool *pool; u16 queue_id; } xsk; }; -- cgit v1.2.3 From afd6220999d4932616322f8a893371f8d0567a2a Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Sun, 30 Aug 2020 11:33:58 +0300 Subject: net: phylink: add helper function to decode USXGMII word With the new addition of the USXGMII link partner ability constants we can now introduce a phylink helper that decodes the USXGMII word and populates the appropriate fields in the phylink_link_state structure based on them. Signed-off-by: Ioana Ciornei Reviewed-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index c36fb41a7d90..d81a714cfbbd 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -490,4 +490,7 @@ void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs); void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs, struct phylink_link_state *state); + +void phylink_decode_usxgmii_word(struct phylink_link_state *state, + uint16_t lpa); #endif -- cgit v1.2.3 From 2dab432c5ae4f9c8eab3132ac0e9facf498ead92 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Sun, 30 Aug 2020 11:34:00 +0300 Subject: net: mdiobus: add clause 45 mdiobus write accessor Add the locked variant of the clause 45 mdiobus write accessor - mdiobus_c45_write(). Signed-off-by: Ioana Ciornei Reviewed-by: Russell King Signed-off-by: David S. Miller --- include/linux/mdio.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 898cbf00332a..3a88b699b758 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -358,6 +358,12 @@ static inline int mdiobus_c45_read(struct mii_bus *bus, int prtad, int devad, return mdiobus_read(bus, prtad, mdiobus_c45_addr(devad, regnum)); } +static inline int mdiobus_c45_write(struct mii_bus *bus, int prtad, int devad, + u16 regnum, u16 val) +{ + return mdiobus_write(bus, prtad, mdiobus_c45_addr(devad, regnum), val); +} + int mdiobus_register_device(struct mdio_device *mdiodev); int mdiobus_unregister_device(struct mdio_device *mdiodev); bool mdiobus_is_registered_device(struct mii_bus *bus, int addr); -- cgit v1.2.3 From 0da4c3d393e40e41e3c6b9f1cebaa498512c2abb Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Sun, 30 Aug 2020 11:34:01 +0300 Subject: net: phy: add Lynx PCS module Add a Lynx PCS module which exposes the necessary operations to drive the PCS using phylink. The majority of the code is extracted from the Felix DSA driver, which will be also modified in a later patch, and exposed as a separate module for code reusability purposes. As such, this aims at feature and bug parity with the existing Felix DSA driver, and thus USXGMII, SGMII, QSGMII and 2500Base-X (only w/o in-band AN) are supported by the Lynx PCS module since these were also supported by Felix. The module can only be enabled by the drivers in need and not user selectable. Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- include/linux/pcs-lynx.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/linux/pcs-lynx.h (limited to 'include/linux') diff --git a/include/linux/pcs-lynx.h b/include/linux/pcs-lynx.h new file mode 100644 index 000000000000..a6440d6ebe95 --- /dev/null +++ b/include/linux/pcs-lynx.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* Copyright 2020 NXP + * Lynx PCS helpers + */ + +#ifndef __LINUX_PCS_LYNX_H +#define __LINUX_PCS_LYNX_H + +#include +#include + +struct lynx_pcs { + struct phylink_pcs pcs; + struct mdio_device *mdio; +}; + +struct lynx_pcs *lynx_pcs_create(struct mdio_device *mdio); + +void lynx_pcs_destroy(struct lynx_pcs *pcs); + +#endif /* __LINUX_PCS_LYNX_H */ -- cgit v1.2.3 From 3f7d820bad6cace3ecb859bf3fa5bc56fe8bb1c6 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 31 Aug 2020 02:26:10 -0400 Subject: net: ipv6: remove unused arg exact_dif in compute_score The arg exact_dif is not used anymore, remove it. inet6_exact_dif_match() is no longer needed after the above is removed, remove it too. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- include/linux/ipv6.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index bac8f4fffbd6..dda61d150a13 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -177,17 +177,6 @@ static inline int inet6_sdif(const struct sk_buff *skb) return 0; } -/* can not be used in TCP layer after tcp_v6_fill_cb */ -static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb) -{ -#if defined(CONFIG_NET_L3_MASTER_DEV) - if (!net->ipv4.sysctl_tcp_l3mdev_accept && - skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) - return true; -#endif - return false; -} - struct tcp6_request_sock { struct tcp_request_sock tcp6rsk_tcp; }; -- cgit v1.2.3 From b7176c261cdbced87bed9562577333150ed05b01 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 24 Aug 2020 11:03:07 +1200 Subject: dma-contiguous: provide the ability to reserve per-numa CMA Right now, drivers like ARM SMMU are using dma_alloc_coherent() to get coherent DMA buffers to save their command queues and page tables. As there is only one default CMA in the whole system, SMMUs on nodes other than node0 will get remote memory. This leads to significant latency. This patch provides per-numa CMA so that drivers like SMMU can get local memory. Tests show localizing CMA can decrease dma_unmap latency much. For instance, before this patch, SMMU on node2 has to wait for more than 560ns for the completion of CMD_SYNC in an empty command queue; with this patch, it needs 240ns only. A positive side effect of this patch would be improving performance even further for those users who are worried about performance more than DMA security and use iommu.passthrough=1 to skip IOMMU. With local CMA, all drivers can get local coherent DMA buffers. Also, this patch changes the default CONFIG_CMA_AREAS to 19 in NUMA. As 1+CONFIG_CMA_AREAS should be quite enough for most servers on the market even they enable both hugetlb_cma and pernuma_cma. 2 numa nodes: 2(hugetlb) + 2(pernuma) + 1(default global cma) = 5 4 numa nodes: 4(hugetlb) + 4(pernuma) + 1(default global cma) = 9 8 numa nodes: 8(hugetlb) + 8(pernuma) + 1(default global cma) = 17 Signed-off-by: Barry Song Signed-off-by: Christoph Hellwig --- include/linux/dma-contiguous.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 03f8e98e3bcc..fe55e004f1f4 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -171,6 +171,12 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page, #endif +#ifdef CONFIG_DMA_PERNUMA_CMA +void dma_pernuma_cma_reserve(void); +#else +static inline void dma_pernuma_cma_reserve(void) { } +#endif + #endif #endif -- cgit v1.2.3 From 2281f797f5524abb8fff66bf8540b4f4687332a2 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 24 Aug 2020 11:03:09 +1200 Subject: mm: cma: use CMA_MAX_NAME to define the length of cma name array CMA_MAX_NAME should be visible to CMA's users as they might need it to set the name of CMA areas and avoid hardcoding the size locally. So this patch moves CMA_MAX_NAME from local header file to include/linux header file and removes the hardcode in both hugetlb.c and contiguous.c. Signed-off-by: Barry Song Signed-off-by: Christoph Hellwig --- include/linux/cma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cma.h b/include/linux/cma.h index 6ff79fefd01f..217999c8a762 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -18,6 +18,8 @@ #endif +#define CMA_MAX_NAME 64 + struct cma; extern unsigned long totalcma_pages; -- cgit v1.2.3 From 70d932985757fbe978024db313001218e9f8fe5c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:36 +0200 Subject: notifier: Fix broken error handling pattern The current notifiers have the following error handling pattern all over the place: int err, nr; err = __foo_notifier_call_chain(&chain, val_up, v, -1, &nr); if (err & NOTIFIER_STOP_MASK) __foo_notifier_call_chain(&chain, val_down, v, nr-1, NULL) And aside from the endless repetition thereof, it is broken. Consider blocking notifiers; both calls take and drop the rwsem, this means that the notifier list can change in between the two calls, making @nr meaningless. Fix this by replacing all the __foo_notifier_call_chain() functions with foo_notifier_call_chain_robust() that embeds the above pattern, but ensures it is inside a single lock region. Note: I switched atomic_notifier_call_chain_robust() to use the spinlock, since RCU cannot provide the guarantee required for the recovery. Note: software_resume() error handling was broken afaict. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20200818135804.325626653@infradead.org --- include/linux/notifier.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 018947611483..2fb373a5c1ed 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -161,20 +161,19 @@ extern int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, extern int atomic_notifier_call_chain(struct atomic_notifier_head *nh, unsigned long val, void *v); -extern int __atomic_notifier_call_chain(struct atomic_notifier_head *nh, - unsigned long val, void *v, int nr_to_call, int *nr_calls); extern int blocking_notifier_call_chain(struct blocking_notifier_head *nh, unsigned long val, void *v); -extern int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, - unsigned long val, void *v, int nr_to_call, int *nr_calls); extern int raw_notifier_call_chain(struct raw_notifier_head *nh, unsigned long val, void *v); -extern int __raw_notifier_call_chain(struct raw_notifier_head *nh, - unsigned long val, void *v, int nr_to_call, int *nr_calls); extern int srcu_notifier_call_chain(struct srcu_notifier_head *nh, unsigned long val, void *v); -extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, - unsigned long val, void *v, int nr_to_call, int *nr_calls); + +extern int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh, + unsigned long val_up, unsigned long val_down, void *v); +extern int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh, + unsigned long val_up, unsigned long val_down, void *v); +extern int raw_notifier_call_chain_robust(struct raw_notifier_head *nh, + unsigned long val_up, unsigned long val_down, void *v); #define NOTIFY_DONE 0x0000 /* Don't care */ #define NOTIFY_OK 0x0001 /* Suits me */ -- cgit v1.2.3 From 563a02b0c9704f69c0364befedd451f57fe88092 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 18 Aug 2020 15:57:40 +0200 Subject: compiler.h: Make __ADDRESSABLE() symbol truly unique The __ADDRESSABLE() macro uses the __LINE__ macro to create a temporary symbol which has a unique name. However, if the macro is used multiple times from within another macro, the line number will always be the same, resulting in duplicate symbols. Make the temporary symbols truly unique by using __UNIQUE_ID instead of __LINE__. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20200818135804.564436253@infradead.org --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 6810d80acb0b..92ef163a7479 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -207,7 +207,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define __ADDRESSABLE(sym) \ static void * __section(.discard.addressable) __used \ - __PASTE(__addressable_##sym, __LINE__) = (void *)&sym; + __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym; /** * offset_to_ptr - convert a relative memory offset to an absolute pointer -- cgit v1.2.3 From 115284d89a436e9b66da0c6c4f6efded806874b2 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 18 Aug 2020 15:57:41 +0200 Subject: static_call: Add basic static call infrastructure Static calls are a replacement for global function pointers. They use code patching to allow direct calls to be used instead of indirect calls. They give the flexibility of function pointers, but with improved performance. This is especially important for cases where retpolines would otherwise be used, as retpolines can significantly impact performance. The concept and code are an extension of previous work done by Ard Biesheuvel and Steven Rostedt: https://lkml.kernel.org/r/20181005081333.15018-1-ard.biesheuvel@linaro.org https://lkml.kernel.org/r/20181006015110.653946300@goodmis.org There are two implementations, depending on arch support: 1) out-of-line: patched trampolines (CONFIG_HAVE_STATIC_CALL) 2) basic function pointers For more details, see the comments in include/linux/static_call.h. [peterz: simplified interface] Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (VMware) Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135804.623259796@infradead.org --- include/linux/static_call.h | 156 ++++++++++++++++++++++++++++++++++++++ include/linux/static_call_types.h | 15 ++++ 2 files changed, 171 insertions(+) create mode 100644 include/linux/static_call.h create mode 100644 include/linux/static_call_types.h (limited to 'include/linux') diff --git a/include/linux/static_call.h b/include/linux/static_call.h new file mode 100644 index 000000000000..d8892dff2e91 --- /dev/null +++ b/include/linux/static_call.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_STATIC_CALL_H +#define _LINUX_STATIC_CALL_H + +/* + * Static call support + * + * Static calls use code patching to hard-code function pointers into direct + * branch instructions. They give the flexibility of function pointers, but + * with improved performance. This is especially important for cases where + * retpolines would otherwise be used, as retpolines can significantly impact + * performance. + * + * + * API overview: + * + * DECLARE_STATIC_CALL(name, func); + * DEFINE_STATIC_CALL(name, func); + * static_call(name)(args...); + * static_call_update(name, func); + * + * Usage example: + * + * # Start with the following functions (with identical prototypes): + * int func_a(int arg1, int arg2); + * int func_b(int arg1, int arg2); + * + * # Define a 'my_name' reference, associated with func_a() by default + * DEFINE_STATIC_CALL(my_name, func_a); + * + * # Call func_a() + * static_call(my_name)(arg1, arg2); + * + * # Update 'my_name' to point to func_b() + * static_call_update(my_name, &func_b); + * + * # Call func_b() + * static_call(my_name)(arg1, arg2); + * + * + * Implementation details: + * + * This requires some arch-specific code (CONFIG_HAVE_STATIC_CALL). + * Otherwise basic indirect calls are used (with function pointers). + * + * Each static_call() site calls into a trampoline associated with the name. + * The trampoline has a direct branch to the default function. Updates to a + * name will modify the trampoline's branch destination. + * + * If the arch has CONFIG_HAVE_STATIC_CALL_INLINE, then the call sites + * themselves will be patched at runtime to call the functions directly, + * rather than calling through the trampoline. This requires objtool or a + * compiler plugin to detect all the static_call() sites and annotate them + * in the .static_call_sites section. + */ + +#include +#include +#include + +#ifdef CONFIG_HAVE_STATIC_CALL +#include + +/* + * Either @site or @tramp can be NULL. + */ +extern void arch_static_call_transform(void *site, void *tramp, void *func); + +#define STATIC_CALL_TRAMP_ADDR(name) &STATIC_CALL_TRAMP(name) + +/* + * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from + * the symbol table so that objtool can reference it when it generates the + * .static_call_sites section. + */ +#define __static_call(name) \ +({ \ + __ADDRESSABLE(STATIC_CALL_KEY(name)); \ + &STATIC_CALL_TRAMP(name); \ +}) + +#else +#define STATIC_CALL_TRAMP_ADDR(name) NULL +#endif + + +#define DECLARE_STATIC_CALL(name, func) \ + extern struct static_call_key STATIC_CALL_KEY(name); \ + extern typeof(func) STATIC_CALL_TRAMP(name); + +#define static_call_update(name, func) \ +({ \ + BUILD_BUG_ON(!__same_type(*(func), STATIC_CALL_TRAMP(name))); \ + __static_call_update(&STATIC_CALL_KEY(name), \ + STATIC_CALL_TRAMP_ADDR(name), func); \ +}) + +#if defined(CONFIG_HAVE_STATIC_CALL) + +struct static_call_key { + void *func; +}; + +#define DEFINE_STATIC_CALL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = _func, \ + }; \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) + +#define static_call(name) __static_call(name) + +static inline +void __static_call_update(struct static_call_key *key, void *tramp, void *func) +{ + cpus_read_lock(); + WRITE_ONCE(key->func, func); + arch_static_call_transform(NULL, tramp, func); + cpus_read_unlock(); +} + +#define EXPORT_STATIC_CALL(name) \ + EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ + EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)) + +#define EXPORT_STATIC_CALL_GPL(name) \ + EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)); \ + EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name)) + +#else /* Generic implementation */ + +struct static_call_key { + void *func; +}; + +#define DEFINE_STATIC_CALL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = _func, \ + } + +#define static_call(name) \ + ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func)) + +static inline +void __static_call_update(struct static_call_key *key, void *tramp, void *func) +{ + WRITE_ONCE(key->func, func); +} + +#define EXPORT_STATIC_CALL(name) EXPORT_SYMBOL(STATIC_CALL_KEY(name)) +#define EXPORT_STATIC_CALL_GPL(name) EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)) + +#endif /* CONFIG_HAVE_STATIC_CALL */ + +#endif /* _LINUX_STATIC_CALL_H */ diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h new file mode 100644 index 000000000000..5ed249dc47d3 --- /dev/null +++ b/include/linux/static_call_types.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _STATIC_CALL_TYPES_H +#define _STATIC_CALL_TYPES_H + +#include + +#define STATIC_CALL_KEY_PREFIX __SCK__ +#define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name) + +#define STATIC_CALL_TRAMP_PREFIX __SCT__ +#define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX) +#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) +#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) + +#endif /* _STATIC_CALL_TYPES_H */ -- cgit v1.2.3 From 9183c3f9ed710a8edf1a61e8a96d497258d26e08 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 18 Aug 2020 15:57:42 +0200 Subject: static_call: Add inline static call infrastructure Add infrastructure for an arch-specific CONFIG_HAVE_STATIC_CALL_INLINE option, which is a faster version of CONFIG_HAVE_STATIC_CALL. At runtime, the static call sites are patched directly, rather than using the out-of-line trampolines. Compared to out-of-line static calls, the performance benefits are more modest, but still measurable. Steven Rostedt did some tracepoint measurements: https://lkml.kernel.org/r/20181126155405.72b4f718@gandalf.local.home This code is heavily inspired by the jump label code (aka "static jumps"), as some of the concepts are very similar. For more details, see the comments in include/linux/static_call.h. [peterz: simplified interface; merged trampolines] Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (VMware) Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135804.684334440@infradead.org --- include/linux/module.h | 5 +++++ include/linux/static_call.h | 36 +++++++++++++++++++++++++++++++++++- include/linux/static_call_types.h | 13 +++++++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index e30ed5fa33a7..a29187f7c360 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -498,6 +499,10 @@ struct module { unsigned long *kprobe_blacklist; unsigned int num_kprobe_blacklist; #endif +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE + int num_static_call_sites; + struct static_call_site *static_call_sites; +#endif #ifdef CONFIG_LIVEPATCH bool klp; /* Is this a livepatch module? */ diff --git a/include/linux/static_call.h b/include/linux/static_call.h index d8892dff2e91..0d7f9efaa3b2 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -95,7 +95,41 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func); STATIC_CALL_TRAMP_ADDR(name), func); \ }) -#if defined(CONFIG_HAVE_STATIC_CALL) +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE + +struct static_call_mod { + struct static_call_mod *next; + struct module *mod; /* for vmlinux, mod == NULL */ + struct static_call_site *sites; +}; + +struct static_call_key { + void *func; + struct static_call_mod *mods; +}; + +extern void __static_call_update(struct static_call_key *key, void *tramp, void *func); +extern int static_call_mod_init(struct module *mod); + +#define DEFINE_STATIC_CALL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = _func, \ + .mods = NULL, \ + }; \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) + +#define static_call(name) __static_call(name) + +#define EXPORT_STATIC_CALL(name) \ + EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ + EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)) + +#define EXPORT_STATIC_CALL_GPL(name) \ + EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)); \ + EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name)) + +#elif defined(CONFIG_HAVE_STATIC_CALL) struct static_call_key { void *func; diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h index 5ed249dc47d3..408d345d83e1 100644 --- a/include/linux/static_call_types.h +++ b/include/linux/static_call_types.h @@ -2,14 +2,27 @@ #ifndef _STATIC_CALL_TYPES_H #define _STATIC_CALL_TYPES_H +#include #include #define STATIC_CALL_KEY_PREFIX __SCK__ +#define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX) +#define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1) #define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name) #define STATIC_CALL_TRAMP_PREFIX __SCT__ #define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX) +#define STATIC_CALL_TRAMP_PREFIX_LEN (sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1) #define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) #define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) +/* + * The static call site table needs to be created by external tooling (objtool + * or a compiler plugin). + */ +struct static_call_site { + s32 addr; + s32 key; +}; + #endif /* _STATIC_CALL_TYPES_H */ -- cgit v1.2.3 From 6333e8f73b834f54e395a056e6002403f0862c51 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:43 +0200 Subject: static_call: Avoid kprobes on inline static_call()s Similar to how we disallow kprobes on any other dynamic text (ftrace/jump_label) also disallow kprobes on inline static_call()s. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200818135804.744920586@infradead.org --- include/linux/static_call.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 0d7f9efaa3b2..6f62ceda7dd9 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -110,6 +110,7 @@ struct static_call_key { extern void __static_call_update(struct static_call_key *key, void *tramp, void *func); extern int static_call_mod_init(struct module *mod); +extern int static_call_text_reserved(void *start, void *end); #define DEFINE_STATIC_CALL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ @@ -153,6 +154,11 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) cpus_read_unlock(); } +static inline int static_call_text_reserved(void *start, void *end) +{ + return 0; +} + #define EXPORT_STATIC_CALL(name) \ EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)) @@ -182,6 +188,11 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) WRITE_ONCE(key->func, func); } +static inline int static_call_text_reserved(void *start, void *end) +{ + return 0; +} + #define EXPORT_STATIC_CALL(name) EXPORT_SYMBOL(STATIC_CALL_KEY(name)) #define EXPORT_STATIC_CALL_GPL(name) EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)) -- cgit v1.2.3 From 452cddbff74b6a15b9354505671011700fe03710 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:48 +0200 Subject: static_call: Add static_call_cond() Extend the static_call infrastructure to optimize the following common pattern: if (func_ptr) func_ptr(args...) For the trampoline (which is in effect a tail-call), we patch the JMP.d32 into a RET, which then directly consumes the trampoline call. For the in-line sites we replace the CALL with a NOP5. NOTE: this is 'obviously' limited to functions with a 'void' return type. NOTE: DEFINE_STATIC_COND_CALL() only requires a typename, as opposed to a full function. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135805.042977182@infradead.org --- include/linux/static_call.h | 86 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'include/linux') diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 6f62ceda7dd9..0f74581e0e2f 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -16,7 +16,9 @@ * * DECLARE_STATIC_CALL(name, func); * DEFINE_STATIC_CALL(name, func); + * DEFINE_STATIC_CALL_NULL(name, typename); * static_call(name)(args...); + * static_call_cond(name)(args...); * static_call_update(name, func); * * Usage example: @@ -52,6 +54,43 @@ * rather than calling through the trampoline. This requires objtool or a * compiler plugin to detect all the static_call() sites and annotate them * in the .static_call_sites section. + * + * + * Notes on NULL function pointers: + * + * Static_call()s support NULL functions, with many of the caveats that + * regular function pointers have. + * + * Clearly calling a NULL function pointer is 'BAD', so too for + * static_call()s (although when HAVE_STATIC_CALL it might not be immediately + * fatal). A NULL static_call can be the result of: + * + * DECLARE_STATIC_CALL_NULL(my_static_call, void (*)(int)); + * + * which is equivalent to declaring a NULL function pointer with just a + * typename: + * + * void (*my_func_ptr)(int arg1) = NULL; + * + * or using static_call_update() with a NULL function. In both cases the + * HAVE_STATIC_CALL implementation will patch the trampoline with a RET + * instruction, instead of an immediate tail-call JMP. HAVE_STATIC_CALL_INLINE + * architectures can patch the trampoline call to a NOP. + * + * In all cases, any argument evaluation is unconditional. Unlike a regular + * conditional function pointer call: + * + * if (my_func_ptr) + * my_func_ptr(arg1) + * + * where the argument evaludation also depends on the pointer value. + * + * When calling a static_call that can be NULL, use: + * + * static_call_cond(name)(arg1); + * + * which will include the required value tests to avoid NULL-pointer + * dereferences. */ #include @@ -120,7 +159,16 @@ extern int static_call_text_reserved(void *start, void *end); }; \ ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) +#define DEFINE_STATIC_CALL_NULL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = NULL, \ + .type = 1, \ + }; \ + ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) + #define static_call(name) __static_call(name) +#define static_call_cond(name) (void)__static_call(name) #define EXPORT_STATIC_CALL(name) \ EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ @@ -143,7 +191,15 @@ struct static_call_key { }; \ ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) +#define DEFINE_STATIC_CALL_NULL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = NULL, \ + }; \ + ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) + #define static_call(name) __static_call(name) +#define static_call_cond(name) (void)__static_call(name) static inline void __static_call_update(struct static_call_key *key, void *tramp, void *func) @@ -179,9 +235,39 @@ struct static_call_key { .func = _func, \ } +#define DEFINE_STATIC_CALL_NULL(name, _func) \ + DECLARE_STATIC_CALL(name, _func); \ + struct static_call_key STATIC_CALL_KEY(name) = { \ + .func = NULL, \ + } + #define static_call(name) \ ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func)) +static inline void __static_call_nop(void) { } + +/* + * This horrific hack takes care of two things: + * + * - it ensures the compiler will only load the function pointer ONCE, + * which avoids a reload race. + * + * - it ensures the argument evaluation is unconditional, similar + * to the HAVE_STATIC_CALL variant. + * + * Sadly current GCC/Clang (10 for both) do not optimize this properly + * and will emit an indirect call for the NULL case :-( + */ +#define __static_call_cond(name) \ +({ \ + void *func = READ_ONCE(STATIC_CALL_KEY(name).func); \ + if (!func) \ + func = &__static_call_nop; \ + (typeof(STATIC_CALL_TRAMP(name))*)func; \ +}) + +#define static_call_cond(name) (void)__static_call_cond(name) + static inline void __static_call_update(struct static_call_key *key, void *tramp, void *func) { -- cgit v1.2.3 From 5b06fd3bb9cdce4f3e731c48eb5b74c4acc47997 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:49 +0200 Subject: static_call: Handle tail-calls GCC can turn our static_call(name)(args...) into a tail call, in which case we get a JMP.d32 into the trampoline (which then does a further tail-call). Teach objtool to recognise and mark these in .static_call_sites and adjust the code patching to deal with this. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135805.101186767@infradead.org --- include/linux/static_call.h | 4 ++-- include/linux/static_call_types.h | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 0f74581e0e2f..519bd666e096 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -103,7 +103,7 @@ /* * Either @site or @tramp can be NULL. */ -extern void arch_static_call_transform(void *site, void *tramp, void *func); +extern void arch_static_call_transform(void *site, void *tramp, void *func, bool tail); #define STATIC_CALL_TRAMP_ADDR(name) &STATIC_CALL_TRAMP(name) @@ -206,7 +206,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) { cpus_read_lock(); WRITE_ONCE(key->func, func); - arch_static_call_transform(NULL, tramp, func); + arch_static_call_transform(NULL, tramp, func, false); cpus_read_unlock(); } diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h index 408d345d83e1..89135bb35bf7 100644 --- a/include/linux/static_call_types.h +++ b/include/linux/static_call_types.h @@ -16,6 +16,13 @@ #define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name) #define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name)) +/* + * Flags in the low bits of static_call_site::key. + */ +#define STATIC_CALL_SITE_TAIL 1UL /* tail call */ +#define STATIC_CALL_SITE_INIT 2UL /* init section */ +#define STATIC_CALL_SITE_FLAGS 3UL + /* * The static call site table needs to be created by external tooling (objtool * or a compiler plugin). -- cgit v1.2.3 From a945c8345ec0decb2f1a7f19a8c5e60bcb1dd1eb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Aug 2020 15:57:51 +0200 Subject: static_call: Allow early init In order to use static_call() to wire up x86_pmu, we need to initialize earlier, specifically before memory allocation works; copy some of the tricks from jump_label to enable this. Primarily we overload key->next to store a sites pointer when there are no modules, this avoids having to use kmalloc() to initialize the sites and allows us to run much earlier. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/20200818135805.220737930@infradead.org --- include/linux/static_call.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 519bd666e096..bfa2ba39be57 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -136,6 +136,8 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool #ifdef CONFIG_HAVE_STATIC_CALL_INLINE +extern void __init static_call_init(void); + struct static_call_mod { struct static_call_mod *next; struct module *mod; /* for vmlinux, mod == NULL */ @@ -144,7 +146,12 @@ struct static_call_mod { struct static_call_key { void *func; - struct static_call_mod *mods; + union { + /* bit 0: 0 = mods, 1 = sites */ + unsigned long type; + struct static_call_mod *mods; + struct static_call_site *sites; + }; }; extern void __static_call_update(struct static_call_key *key, void *tramp, void *func); @@ -155,7 +162,7 @@ extern int static_call_text_reserved(void *start, void *end); DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = { \ .func = _func, \ - .mods = NULL, \ + .type = 1, \ }; \ ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) @@ -180,6 +187,8 @@ extern int static_call_text_reserved(void *start, void *end); #elif defined(CONFIG_HAVE_STATIC_CALL) +static inline void static_call_init(void) { } + struct static_call_key { void *func; }; @@ -225,6 +234,8 @@ static inline int static_call_text_reserved(void *start, void *end) #else /* Generic implementation */ +static inline void static_call_init(void) { } + struct static_call_key { void *func; }; -- cgit v1.2.3 From d25e37d89dd2f41d7acae0429039d2f0ae8b4a07 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 18 Aug 2020 15:57:52 +0200 Subject: tracepoint: Optimize using static_call() Currently the tracepoint site will iterate a vector and issue indirect calls to however many handlers are registered (ie. the vector is long). Using static_call() it is possible to optimize this for the common case of only having a single handler registered. In this case the static_call() can directly call this handler. Otherwise, if the vector is longer than 1, call a function that iterates the whole vector like the current code. [peterz: updated to new interface] Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20200818135805.279421092@infradead.org --- include/linux/tracepoint-defs.h | 5 +++ include/linux/tracepoint.h | 86 +++++++++++++++++++++++++++++------------ 2 files changed, 66 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index b29950a19205..de97450cf190 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -11,6 +11,8 @@ #include #include +struct static_call_key; + struct trace_print_flags { unsigned long mask; const char *name; @@ -30,6 +32,9 @@ struct tracepoint_func { struct tracepoint { const char *name; /* Tracepoint name */ struct static_key key; + struct static_call_key *static_call_key; + void *static_call_tramp; + void *iterator; int (*regfunc)(void); void (*unregfunc)(void); struct tracepoint_func __rcu *funcs; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 598fec9f9dbf..3722a10fc46d 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -19,6 +19,7 @@ #include #include #include +#include struct module; struct tracepoint; @@ -92,7 +93,9 @@ extern int syscall_regfunc(void); extern void syscall_unregfunc(void); #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ +#ifndef PARAMS #define PARAMS(args...) args +#endif #define TRACE_DEFINE_ENUM(x) #define TRACE_DEFINE_SIZEOF(x) @@ -148,6 +151,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #ifdef TRACEPOINTS_ENABLED +#ifdef CONFIG_HAVE_STATIC_CALL +#define __DO_TRACE_CALL(name) static_call(tp_func_##name) +#else +#define __DO_TRACE_CALL(name) __tracepoint_iter_##name +#endif /* CONFIG_HAVE_STATIC_CALL */ + /* * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. @@ -157,12 +166,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * has a "void" prototype, then it is invalid to declare a function * as "(void *, void)". */ -#define __DO_TRACE(tp, proto, args, cond, rcuidle) \ +#define __DO_TRACE(name, proto, args, cond, rcuidle) \ do { \ struct tracepoint_func *it_func_ptr; \ - void *it_func; \ - void *__data; \ int __maybe_unused __idx = 0; \ + void *__data; \ \ if (!(cond)) \ return; \ @@ -182,14 +190,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) rcu_irq_enter_irqson(); \ } \ \ - it_func_ptr = rcu_dereference_raw((tp)->funcs); \ - \ + it_func_ptr = \ + rcu_dereference_raw((&__tracepoint_##name)->funcs); \ if (it_func_ptr) { \ - do { \ - it_func = (it_func_ptr)->func; \ - __data = (it_func_ptr)->data; \ - ((void(*)(proto))(it_func))(args); \ - } while ((++it_func_ptr)->func); \ + __data = (it_func_ptr)->data; \ + __DO_TRACE_CALL(name)(args); \ } \ \ if (rcuidle) { \ @@ -205,7 +210,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) static inline void trace_##name##_rcuidle(proto) \ { \ if (static_key_false(&__tracepoint_##name.key)) \ - __DO_TRACE(&__tracepoint_##name, \ + __DO_TRACE(name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ TP_CONDITION(cond), 1); \ @@ -227,11 +232,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * poking RCU a bit. */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ + extern int __tracepoint_iter_##name(data_proto); \ + DECLARE_STATIC_CALL(tp_func_##name, __tracepoint_iter_##name); \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ if (static_key_false(&__tracepoint_##name.key)) \ - __DO_TRACE(&__tracepoint_##name, \ + __DO_TRACE(name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ TP_CONDITION(cond), 0); \ @@ -277,21 +284,50 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * structures, so we create an array of pointers that will be used for iteration * on the tracepoints. */ -#define DEFINE_TRACE_FN(name, reg, unreg) \ - static const char __tpstrtab_##name[] \ - __section(__tracepoints_strings) = #name; \ - struct tracepoint __tracepoint_##name __used \ - __section(__tracepoints) = \ - { __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\ - __TRACEPOINT_ENTRY(name); +#define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args) \ + static const char __tpstrtab_##_name[] \ + __section(__tracepoints_strings) = #_name; \ + extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name); \ + int __tracepoint_iter_##_name(void *__data, proto); \ + struct tracepoint __tracepoint_##_name __used \ + __section(__tracepoints) = { \ + .name = __tpstrtab_##_name, \ + .key = STATIC_KEY_INIT_FALSE, \ + .static_call_key = &STATIC_CALL_KEY(tp_func_##_name), \ + .static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \ + .iterator = &__tracepoint_iter_##_name, \ + .regfunc = _reg, \ + .unregfunc = _unreg, \ + .funcs = NULL }; \ + __TRACEPOINT_ENTRY(_name); \ + int __tracepoint_iter_##_name(void *__data, proto) \ + { \ + struct tracepoint_func *it_func_ptr; \ + void *it_func; \ + \ + it_func_ptr = \ + rcu_dereference_raw((&__tracepoint_##_name)->funcs); \ + do { \ + it_func = (it_func_ptr)->func; \ + __data = (it_func_ptr)->data; \ + ((void(*)(void *, proto))(it_func))(__data, args); \ + } while ((++it_func_ptr)->func); \ + return 0; \ + } \ + DEFINE_STATIC_CALL(tp_func_##_name, __tracepoint_iter_##_name); -#define DEFINE_TRACE(name) \ - DEFINE_TRACE_FN(name, NULL, NULL); +#define DEFINE_TRACE(name, proto, args) \ + DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)); #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ - EXPORT_SYMBOL_GPL(__tracepoint_##name) + EXPORT_SYMBOL_GPL(__tracepoint_##name); \ + EXPORT_SYMBOL_GPL(__tracepoint_iter_##name); \ + EXPORT_STATIC_CALL_GPL(tp_func_##name) #define EXPORT_TRACEPOINT_SYMBOL(name) \ - EXPORT_SYMBOL(__tracepoint_##name) + EXPORT_SYMBOL(__tracepoint_##name); \ + EXPORT_SYMBOL(__tracepoint_iter_##name); \ + EXPORT_STATIC_CALL(tp_func_##name) + #else /* !TRACEPOINTS_ENABLED */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ @@ -320,8 +356,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) return false; \ } -#define DEFINE_TRACE_FN(name, reg, unreg) -#define DEFINE_TRACE(name) +#define DEFINE_TRACE_FN(name, reg, unreg, proto, args) +#define DEFINE_TRACE(name, proto, args) #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) -- cgit v1.2.3 From f53fa968a7344970b8f8a5707c39cdcf17a6f367 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 5 May 2020 15:42:35 +0200 Subject: scif: Fix spelling of EACCES As per POSIX, the correct spelling is EACCES: include/uapi/asm-generic/errno-base.h:#define EACCES 13 /* Permission denied */ Signed-off-by: Geert Uytterhoeven Signed-off-by: Jiri Kosina --- include/linux/scif.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scif.h b/include/linux/scif.h index eeb250b73c4b..329e695b8fe5 100644 --- a/include/linux/scif.h +++ b/include/linux/scif.h @@ -657,7 +657,7 @@ int scif_unregister(scif_epd_t epd, off_t offset, size_t len); * the negative of one of the following errors is returned. * * Errors: - * EACCESS - Attempt to write to a read-only range + * EACCES - Attempt to write to a read-only range * EBADF, ENOTTY - epd is not a valid endpoint descriptor * ECONNRESET - Connection reset by peer * EINVAL - rma_flags is invalid @@ -733,7 +733,7 @@ int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t * the negative of one of the following errors is returned. * * Errors: - * EACCESS - Attempt to write to a read-only range + * EACCES - Attempt to write to a read-only range * EBADF, ENOTTY - epd is not a valid endpoint descriptor * ECONNRESET - Connection reset by peer * EINVAL - rma_flags is invalid @@ -815,7 +815,7 @@ int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t * the negative of one of the following errors is returned. * * Errors: - * EACCESS - Attempt to write to a read-only range + * EACCES - Attempt to write to a read-only range * EBADF, ENOTTY - epd is not a valid endpoint descriptor * ECONNRESET - Connection reset by peer * EINVAL - rma_flags is invalid @@ -895,7 +895,7 @@ int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset, * the negative of one of the following errors is returned. * * Errors: - * EACCESS - Attempt to write to a read-only range + * EACCES - Attempt to write to a read-only range * EBADF, ENOTTY - epd is not a valid endpoint descriptor * ECONNRESET - Connection reset by peer * EINVAL - rma_flags is invalid -- cgit v1.2.3 From db04e18dbb0146d3c753dc05f7233350375bbc48 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 19 Aug 2020 14:34:03 +0200 Subject: block: Make request_queue.rpm_status an enum request_queue.rpm_status is assigned values of the rpm_status enum only, so reflect that in its type. Note that including is (currently) a no-op, as it is already included through and , but it is better to play it safe. Signed-off-by: Geert Uytterhoeven Acked-by: Rafael J. Wysocki Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bb5636cc17b9..0a1730b30ad2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -24,6 +24,7 @@ #include #include #include +#include struct module; struct scsi_ioctl_command; @@ -458,7 +459,7 @@ struct request_queue { #ifdef CONFIG_PM struct device *dev; - int rpm_status; + enum rpm_status rpm_status; unsigned int nr_pending; #endif -- cgit v1.2.3 From 611bee526b4a89d49f1b9914a770bfdc101d5fb5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 23 Aug 2020 11:10:41 +0200 Subject: block: replace bd_set_size with bd_set_nr_sectors Replace bd_set_size with a version that takes the number of sectors instead, as that fits most of the current and future callers much better. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 4ab853461dff..39025dc0397c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -375,7 +375,7 @@ void unregister_blkdev(unsigned int major, const char *name); int revalidate_disk(struct gendisk *disk); int check_disk_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); -void bd_set_size(struct block_device *bdev, loff_t size); +void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors); /* for drivers/char/raw.c: */ int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); -- cgit v1.2.3 From c2b4bb8cb3741c0bacf3683e4c1ecd04c977ada3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 23 Aug 2020 11:10:42 +0200 Subject: block: fix locking for struct block_device size updates Two different callers use two different mutexes for updating the block device size, which obviously doesn't help to actually protect against concurrent updates from the different callers. In addition one of the locks, bd_mutex is rather prone to deadlocks with other parts of the block stack that use it for high level synchronization. Switch to using a new spinlock protecting just the size updates, as that is all we need, and make sure everyone does the update through the proper helper. This fixes a bug reported with the nvme revalidating disks during a hot removal operation, which can currently deadlock on bd_mutex. Reported-by: Xianting Tian Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 4ecf4fed171f..5accc2549d22 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -38,6 +38,7 @@ struct block_device { /* number of times partitions within this device have been opened. */ unsigned bd_part_count; int bd_invalidated; + spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; struct backing_dev_info *bd_bdi; -- cgit v1.2.3 From f3256075ba49d80835b601bfbff350a2140b2924 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Aug 2020 17:37:45 +0200 Subject: block: remove the BIO_NULL_MAPPED flag We can simply use a boolean flag in the bio_map_data data structure instead. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 5accc2549d22..78b073956884 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -257,7 +257,6 @@ enum { BIO_CLONED, /* doesn't own data */ BIO_BOUNCED, /* bio is a bounce bio */ BIO_USER_MAPPED, /* contains user pages */ - BIO_NULL_MAPPED, /* contains invalid user pages */ BIO_WORKINGSET, /* contains userspace workingset pages */ BIO_QUIET, /* Make BIO Quiet */ BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ -- cgit v1.2.3 From 3310eebafe6f9a872c1f757b3d822dafae9c0cd8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Aug 2020 17:37:48 +0200 Subject: block: remove the BIO_USER_MAPPED flag Just check if there is private data, in which case the bio must have originated from bio_copy_user_iov. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 78b073956884..63a39e47fc60 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -256,7 +256,6 @@ enum { BIO_NO_PAGE_REF, /* don't put release vec pages */ BIO_CLONED, /* doesn't own data */ BIO_BOUNCED, /* bio is a bounce bio */ - BIO_USER_MAPPED, /* contains user pages */ BIO_WORKINGSET, /* contains userspace workingset pages */ BIO_QUIET, /* Make BIO Quiet */ BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ -- cgit v1.2.3 From bdc6a287bc98e8f32bf52c9cb2d1bdf75975f5a0 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 28 Aug 2020 10:52:55 +0800 Subject: block: Move blk_mq_bio_list_merge() into blk-merge.c Move the blk_mq_bio_list_merge() into blk-merge.c and rename it as a generic name. Reviewed-by: Christoph Hellwig Signed-off-by: Baolin Wang Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9d2d5ad367a4..21a02e0577dd 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -489,8 +489,6 @@ void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); void blk_mq_complete_request(struct request *rq); bool blk_mq_complete_request_remote(struct request *rq); -bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, - struct bio *bio, unsigned int nr_segs); bool blk_mq_queue_stopped(struct request_queue *q); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); -- cgit v1.2.3 From 7b8917f5e29c377be1db5680249fe30e038cb3eb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 31 Aug 2020 20:02:33 +0200 Subject: block: remove the alignment_offset field from struct hd_struct The alignment offset is only used in slow path callers, so just calculate it on the fly. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ++--- include/linux/genhd.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0a1730b30ad2..ba1f5f5e11c6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1456,10 +1456,9 @@ static inline int bdev_alignment_offset(struct block_device *bdev) if (q->limits.misaligned) return -1; - if (bdev != bdev->bd_contains) - return bdev->bd_part->alignment_offset; - + return queue_limit_alignment_offset(&q->limits, + bdev->bd_part->start_sect); return q->limits.alignment_offset; } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 39025dc0397c..bfa411c80dbb 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -65,7 +65,6 @@ struct hd_struct { struct disk_stats __percpu *dkstats; struct percpu_ref ref; - sector_t alignment_offset; unsigned int discard_alignment; struct device __dev; struct kobject *holder_dir; -- cgit v1.2.3 From 7cf34d97ab45203b975396393ded9d3867dfa8bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 31 Aug 2020 20:02:34 +0200 Subject: block: remove the discard_alignment field from struct hd_struct The alignment offset is only used in slow path callers, so just calculate it on the fly. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- include/linux/genhd.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ba1f5f5e11c6..d0d61bc81615 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1498,8 +1498,8 @@ static inline int bdev_discard_alignment(struct block_device *bdev) struct request_queue *q = bdev_get_queue(bdev); if (bdev != bdev->bd_contains) - return bdev->bd_part->discard_alignment; - + return queue_limit_discard_alignment(&q->limits, + bdev->bd_part->start_sect); return q->limits.discard_alignment; } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index bfa411c80dbb..9ea2ca31c278 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -65,7 +65,6 @@ struct hd_struct { struct disk_stats __percpu *dkstats; struct percpu_ref ref; - unsigned int discard_alignment; struct device __dev; struct kobject *holder_dir; int policy, partno; -- cgit v1.2.3 From 46d40cfad13ccbd0739019d754d46d8f93e1d5aa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 31 Aug 2020 20:02:35 +0200 Subject: block: remove an outdated comment on the bd_dev field kdev_t is long gone, so we don't need to comment a field isn't one.. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 63a39e47fc60..59d9150165c4 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -20,7 +20,7 @@ typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; struct block_device { - dev_t bd_dev; /* not a kdev_t - it's a search key */ + dev_t bd_dev; int bd_openers; struct inode * bd_inode; /* will die */ struct super_block * bd_super; -- cgit v1.2.3 From a8c7ffdb5fdde3a57c0b654f66f4d81325abe69f Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Aug 2020 10:51:56 +0200 Subject: mtd: nand: Introduce the ECC engine framework Create a generic ECC engine framework. This is a base to instantiate ECC engine objects. If we really want to be generic, bindings must evolve, so here is the new logic. The following three properties are mutually exclusive: - The nand-no-ecc-engine boolean property is set and there is no ECC engine to retrieve. - The nand-use-soft-ecc-engine boolean property is set and the core will force using the use of software correction. - There is a nand-ecc-engine property pointing at a node which will act as ECC engine. It the later case, the property may reference: - The NAND chip node itself (for the on-die ECC case). - The parent node if the NAND controller embeds an ECC engine. - Any other node being an external ECC controller as well. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200827085208.16276-9-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 114 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index e754a6fc8a4b..8cf5bdbea782 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -127,6 +127,40 @@ struct nand_page_io_req { int mode; }; +const struct mtd_ooblayout_ops *nand_get_small_page_ooblayout(void); +const struct mtd_ooblayout_ops *nand_get_large_page_ooblayout(void); +const struct mtd_ooblayout_ops *nand_get_large_page_hamming_ooblayout(void); + +/** + * enum nand_ecc_engine_type - NAND ECC engine type + * @NAND_ECC_ENGINE_TYPE_INVALID: Invalid value + * @NAND_ECC_ENGINE_TYPE_NONE: No ECC correction + * @NAND_ECC_ENGINE_TYPE_SOFT: Software ECC correction + * @NAND_ECC_ENGINE_TYPE_ON_HOST: On host hardware ECC correction + * @NAND_ECC_ENGINE_TYPE_ON_DIE: On chip hardware ECC correction + */ +enum nand_ecc_engine_type { + NAND_ECC_ENGINE_TYPE_INVALID, + NAND_ECC_ENGINE_TYPE_NONE, + NAND_ECC_ENGINE_TYPE_SOFT, + NAND_ECC_ENGINE_TYPE_ON_HOST, + NAND_ECC_ENGINE_TYPE_ON_DIE, +}; + +/** + * enum nand_ecc_placement - NAND ECC bytes placement + * @NAND_ECC_PLACEMENT_UNKNOWN: The actual position of the ECC bytes is unknown + * @NAND_ECC_PLACEMENT_OOB: The ECC bytes are located in the OOB area + * @NAND_ECC_PLACEMENT_INTERLEAVED: Syndrome layout, there are ECC bytes + * interleaved with regular data in the main + * area + */ +enum nand_ecc_placement { + NAND_ECC_PLACEMENT_UNKNOWN, + NAND_ECC_PLACEMENT_OOB, + NAND_ECC_PLACEMENT_INTERLEAVED, +}; + /** * enum nand_ecc_algo - NAND ECC algorithm * @NAND_ECC_ALGO_UNKNOWN: Unknown algorithm @@ -143,16 +177,27 @@ enum nand_ecc_algo { /** * struct nand_ecc_props - NAND ECC properties + * @engine_type: ECC engine type + * @placement: OOB placement (if relevant) + * @algo: ECC algorithm (if relevant) * @strength: ECC strength * @step_size: Number of bytes per step + * @flags: Misc properties */ struct nand_ecc_props { + enum nand_ecc_engine_type engine_type; + enum nand_ecc_placement placement; + enum nand_ecc_algo algo; unsigned int strength; unsigned int step_size; + unsigned int flags; }; #define NAND_ECCREQ(str, stp) { .strength = (str), .step_size = (stp) } +/* NAND ECC misc flags */ +#define NAND_ECC_MAXIMIZE_STRENGTH BIT(0) + /** * struct nand_bbt - bad block table object * @cache: in memory BBT cache @@ -183,6 +228,75 @@ struct nand_ops { bool (*isbad)(struct nand_device *nand, const struct nand_pos *pos); }; +/** + * struct nand_ecc_context - Context for the ECC engine + * @conf: basic ECC engine parameters + * @total: total number of bytes used for storing ECC codes, this is used by + * generic OOB layouts + * @priv: ECC engine driver private data + */ +struct nand_ecc_context { + struct nand_ecc_props conf; + unsigned int total; + void *priv; +}; + +/** + * struct nand_ecc_engine_ops - ECC engine operations + * @init_ctx: given a desired user configuration for the pointed NAND device, + * requests the ECC engine driver to setup a configuration with + * values it supports. + * @cleanup_ctx: clean the context initialized by @init_ctx. + * @prepare_io_req: is called before reading/writing a page to prepare the I/O + * request to be performed with ECC correction. + * @finish_io_req: is called after reading/writing a page to terminate the I/O + * request and ensure proper ECC correction. + */ +struct nand_ecc_engine_ops { + int (*init_ctx)(struct nand_device *nand); + void (*cleanup_ctx)(struct nand_device *nand); + int (*prepare_io_req)(struct nand_device *nand, + struct nand_page_io_req *req); + int (*finish_io_req)(struct nand_device *nand, + struct nand_page_io_req *req); +}; + +/** + * struct nand_ecc_engine - ECC engine abstraction for NAND devices + * @ops: ECC engine operations + */ +struct nand_ecc_engine { + struct nand_ecc_engine_ops *ops; +}; + +void of_get_nand_ecc_user_config(struct nand_device *nand); +int nand_ecc_init_ctx(struct nand_device *nand); +void nand_ecc_cleanup_ctx(struct nand_device *nand); +int nand_ecc_prepare_io_req(struct nand_device *nand, + struct nand_page_io_req *req); +int nand_ecc_finish_io_req(struct nand_device *nand, + struct nand_page_io_req *req); +bool nand_ecc_is_strong_enough(struct nand_device *nand); + +/** + * struct nand_ecc - Information relative to the ECC + * @defaults: Default values, depend on the underlying subsystem + * @requirements: ECC requirements from the NAND chip perspective + * @user_conf: User desires in terms of ECC parameters + * @ctx: ECC context for the ECC engine, derived from the device @requirements + * the @user_conf and the @defaults + * @ondie_engine: On-die ECC engine reference, if any + * @engine: ECC engine actually bound + */ +struct nand_ecc { + struct nand_ecc_props defaults; + struct nand_ecc_props requirements; + struct nand_ecc_props user_conf; + struct nand_ecc_context ctx; + struct nand_ecc_engine *ondie_engine; + struct nand_ecc_engine *engine; +}; + /** * struct nand_device - NAND device * @mtd: MTD instance attached to the NAND device -- cgit v1.2.3 From 0f50257f5348751bf5b93d052885de23b2367cd2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 30 Aug 2020 21:16:55 -0700 Subject: dma-buf: fix kernel-doc warning in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix kernel-doc warning in : ../include/linux/dma-buf.h:330: warning: Function parameter or member 'name_lock' not described in 'dma_buf' Signed-off-by: Randy Dunlap Cc: Sumit Semwal Cc: Gustavo Padovan Cc: Christian König Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/388523/ Signed-off-by: Christian König --- include/linux/dma-buf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index a2ca294eaebe..957b398d30e5 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -283,6 +283,7 @@ struct dma_buf_ops { * @exp_name: name of the exporter; useful for debugging. * @name: userspace-provided name; useful for accounting and debugging, * protected by @resv. + * @name_lock: spinlock to protect name access * @owner: pointer to exporter module; used for refcounting when exporter is a * kernel module. * @list_node: node for dma_buf accounting and debugging. @@ -311,7 +312,7 @@ struct dma_buf { void *vmap_ptr; const char *exp_name; const char *name; - spinlock_t name_lock; /* spinlock to protect name access */ + spinlock_t name_lock; struct module *owner; struct list_head list_node; void *priv; -- cgit v1.2.3 From f4ad06f2bb8476548b08f89919ee65abc4e40212 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 17:57:42 +0200 Subject: block: rename bd_invalidated Replace bd_invalidate with a new BDEV_NEED_PART_SCAN flag in a bd_flags variable to better describe the condition. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 59d9150165c4..6ffa783e1633 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -19,6 +19,8 @@ struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; +#define BDEV_NEED_PART_SCAN 0 + struct block_device { dev_t bd_dev; int bd_openers; @@ -37,7 +39,7 @@ struct block_device { struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; - int bd_invalidated; + unsigned long bd_flags; spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; struct backing_dev_info *bd_bdi; -- cgit v1.2.3 From 659e56ba864d37b7ee0a49cd432205b2a5ca815e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 17:57:43 +0200 Subject: block: add a new revalidate_disk_size helper revalidate_disk is a relative awkward helper for driver use, as it first calls an optional driver method and then updates the block device size, while most callers either don't need the method call at all, or want to keep state between the caller and the called method. Add a revalidate_disk_size helper that just performs the update of the block device size from the gendisk one, and switch all drivers that do not implement ->revalidate_disk to use the new helper instead of revalidate_disk() Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Acked-by: Song Liu Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9ea2ca31c278..f76c8baf6b7d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -371,6 +371,7 @@ int register_blkdev(unsigned int major, const char *name); void unregister_blkdev(unsigned int major, const char *name); int revalidate_disk(struct gendisk *disk); +void revalidate_disk_size(struct gendisk *disk, bool verbose); int check_disk_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors); -- cgit v1.2.3 From b8086d3f5a0e88b1912d55a158b8a6a43ad6604b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 17:57:44 +0200 Subject: block: use revalidate_disk_size in set_capacity_revalidate_and_notify Only virtio_blk and xen-blkfront set the revalidate argument to true, and both do not implement the ->revalidate_disk method. So switch to the helper that just updates the size instead. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/genhd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index f76c8baf6b7d..02a73198b289 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -313,8 +313,8 @@ static inline int get_disk_ro(struct gendisk *disk) extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); -extern void set_capacity_revalidate_and_notify(struct gendisk *disk, - sector_t size, bool revalidate); +void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, + bool update_bdev); extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask); /* drivers/char/random.c */ -- cgit v1.2.3 From de09077c89183cbc627d9393706343662da7f5a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 17:57:48 +0200 Subject: block: remove revalidate_disk() Remove the now unused helper. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Acked-by: Song Liu Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 02a73198b289..c618b27292fc 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -370,7 +370,6 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); int register_blkdev(unsigned int major, const char *name); void unregister_blkdev(unsigned int major, const char *name); -int revalidate_disk(struct gendisk *disk); void revalidate_disk_size(struct gendisk *disk, bool verbose); int check_disk_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); -- cgit v1.2.3 From 21f8e4828c44da39b0670c5d99d5728b739542a1 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 2 Sep 2020 17:18:43 +0300 Subject: regmap: Add can_sleep configuration option Regmap can't sleep if spinlock is used for the locking protection. This patch fixes regression caused by a previous commit that switched regmap to use fsleep() and this broke Amlogic S922X platform. This patch adds new configuration option for regmap users, allowing to specify whether regmap operations can sleep and assuming that sleep is allowed if mutex is used for the regmap locking protection. Reported-by: Marek Szyprowski Fixes: 2b32d2f7ce0a ("regmap: Use flexible sleep") Signed-off-by: Dmitry Osipenko Tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/20200902141843.6591-1-digetx@gmail.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index d865d8fea535..0c49d59168b5 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -342,6 +342,7 @@ typedef void (*regmap_unlock)(void *); * @hwlock_id: Specify the hardware spinlock id. * @hwlock_mode: The hardware spinlock mode, should be HWLOCK_IRQSTATE, * HWLOCK_IRQ or 0. + * @can_sleep: Optional, specifies whether regmap operations can sleep. */ struct regmap_config { const char *name; @@ -398,6 +399,8 @@ struct regmap_config { bool use_hwlock; unsigned int hwlock_id; unsigned int hwlock_mode; + + bool can_sleep; }; /** -- cgit v1.2.3 From 7547dbd3b198f309aaff54e3528898a8a196faff Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 28 Aug 2020 14:05:07 +0300 Subject: dmaengine: Mark dma_request_slave_channel() deprecated New drivers should use dma_request_chan() instead dma_request_slave_channel() dma_request_slave_channel() is a simple wrapper for dma_request_chan() eating up the error code for channel request failure and makes deferred probing impossible. Move the dma_request_slave_channel() into the header as inline function, mark it as deprecated. Signed-off-by: Peter Ujfalusi Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200828110507.22407-1-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 011371b7f081..dd357a747780 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1472,7 +1472,6 @@ void dma_issue_pending_all(void); struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param, struct device_node *np); -struct dma_chan *dma_request_slave_channel(struct device *dev, const char *name); struct dma_chan *dma_request_chan(struct device *dev, const char *name); struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask); @@ -1502,11 +1501,6 @@ static inline struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, { return NULL; } -static inline struct dma_chan *dma_request_slave_channel(struct device *dev, - const char *name) -{ - return NULL; -} static inline struct dma_chan *dma_request_chan(struct device *dev, const char *name) { @@ -1575,6 +1569,15 @@ void dma_run_dependencies(struct dma_async_tx_descriptor *tx); #define dma_request_channel(mask, x, y) \ __dma_request_channel(&(mask), x, y, NULL) +/* Deprecated, please use dma_request_chan() directly */ +static inline struct dma_chan * __deprecated +dma_request_slave_channel(struct device *dev, const char *name) +{ + struct dma_chan *ch = dma_request_chan(dev, name); + + return IS_ERR(ch) ? NULL : ch; +} + static inline struct dma_chan *dma_request_slave_channel_compat(const dma_cap_mask_t mask, dma_filter_fn fn, void *fn_param, -- cgit v1.2.3 From 63642595a78da42f841fabcc3f309f4c1362dc40 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 31 Aug 2020 21:43:17 +0800 Subject: soundwire: add definition for maximum number of ports A Device may have at most 15 physical ports (DP0, DP1..DP14). Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Rander Wang Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20200831134318.11443-3-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 76052f12c9f7..0aa4c6af7554 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -38,7 +38,8 @@ struct sdw_slave; #define SDW_FRAME_CTRL_BITS 48 #define SDW_MAX_DEVICES 11 -#define SDW_VALID_PORT_RANGE(n) ((n) <= 14 && (n) >= 1) +#define SDW_MAX_PORTS 15 +#define SDW_VALID_PORT_RANGE(n) ((n) < SDW_MAX_PORTS && (n) >= 1) enum { SDW_PORT_DIRN_SINK = 0, -- cgit v1.2.3 From 6073755886a463a7a7aecdd0abb32a1d38bdb7e6 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 31 Aug 2020 21:43:18 +0800 Subject: soundwire: fix port_ready[] dynamic allocation in mipi_disco The existing code allocates memory for the total number of ports. This only works if the ports are contiguous, but will break if e.g. a Devices uses port0, 1, and 14. The port_ready[] array would contain 3 elements, which would lead to an out-of-bounds access. Conversely in other cases, the wrong port index would be used leading to timeouts on prepare. This can be fixed by allocating for the worst-case of 15 ports (DP0..DP14). In addition since the number is now fixed, we can use an array instead of a dynamic allocation. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Rander Wang Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20200831134318.11443-4-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 0aa4c6af7554..63e71645fd13 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -619,7 +619,7 @@ struct sdw_slave { struct dentry *debugfs; #endif struct list_head node; - struct completion *port_ready; + struct completion port_ready[SDW_MAX_PORTS]; enum sdw_clk_stop_mode curr_clk_stop_mode; u16 dev_num; u16 dev_num_sticky; -- cgit v1.2.3 From 88d7c71ea5b29b322d9c72103a196234cb5040db Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 1 Sep 2020 23:05:51 +0800 Subject: soundwire: bus: update multi-link definition with hw sync details Hardware-based synchronization is typically required when the bus->multi_link flag is set. On Intel platforms, when the Cadence IP is configured in 'Multi Master Mode', the hardware synchronization is required even when a stream only uses a single segment. The existing code only deal with hardware synchronization when a stream uses more than one segment so to remain backwards compatible we add a configuration threshold. For Intel cases this threshold will be set to one, other platforms may be able to use the SSP-based sync in those cases. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20200901150556.19432-6-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 63e71645fd13..78f52cdeb2c9 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -828,6 +828,11 @@ struct sdw_master_ops { * @multi_link: Store bus property that indicates if multi links * are supported. This flag is populated by drivers after reading * appropriate firmware (ACPI/DT). + * @hw_sync_min_links: Number of links used by a stream above which + * hardware-based synchronization is required. This value is only + * meaningful if multi_link is set. If set to 1, hardware-based + * synchronization will be used even if a stream only uses a single + * SoundWire segment. */ struct sdw_bus { struct device *dev; @@ -851,6 +856,7 @@ struct sdw_bus { unsigned int clk_stop_timeout; u32 bank_switch_timeout; bool multi_link; + int hw_sync_min_links; }; int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, -- cgit v1.2.3 From 1e9d90dbed120ec98517428ffff4dacd9797e39d Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 1 Sep 2020 15:16:45 -0700 Subject: dma-mapping: introduce dma_get_seg_boundary_nr_pages() We found that callers of dma_get_seg_boundary mostly do an ALIGN with page mask and then do a page shift to get number of pages: ALIGN(boundary + 1, 1 << shift) >> shift However, the boundary might be as large as ULONG_MAX, which means that a device has no specific boundary limit. So either "+ 1" or passing it to ALIGN() would potentially overflow. According to kernel defines: #define ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define ALIGN(x, a) ALIGN_MASK(x, (typeof(x))(a) - 1) We can simplify the logic here into a helper function doing: ALIGN(boundary + 1, 1 << shift) >> shift = ALIGN_MASK(b + 1, (1 << s) - 1) >> s = {[b + 1 + (1 << s) - 1] & ~[(1 << s) - 1]} >> s = [b + 1 + (1 << s) - 1] >> s = [b + (1 << s)] >> s = (b >> s) + 1 This patch introduces and applies dma_get_seg_boundary_nr_pages() as an overflow-free helper for the dma_get_seg_boundary() callers to get numbers of pages. It also takes care of the NULL dev case for non-DMA API callers. Suggested-by: Christoph Hellwig Signed-off-by: Nicolin Chen Acked-by: Niklas Schnelle Acked-by: Michael Ellerman (powerpc) Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 52635e91143b..faab0a8210b9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -632,6 +632,25 @@ static inline unsigned long dma_get_seg_boundary(struct device *dev) return DMA_BIT_MASK(32); } +/** + * dma_get_seg_boundary_nr_pages - return the segment boundary in "page" units + * @dev: device to guery the boundary for + * @page_shift: ilog() of the IOMMU page size + * + * Return the segment boundary in IOMMU page units (which may be different from + * the CPU page size) for the passed in device. + * + * If @dev is NULL a boundary of U32_MAX is assumed, this case is just for + * non-DMA API callers. + */ +static inline unsigned long dma_get_seg_boundary_nr_pages(struct device *dev, + unsigned int page_shift) +{ + if (!dev) + return (U32_MAX >> page_shift) + 1; + return (dma_get_seg_boundary(dev) >> page_shift) + 1; +} + static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) { if (dev->dma_parms) { -- cgit v1.2.3 From 135ba11a7a07b4ce9197d9fa4b196329a57f1e06 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 1 Sep 2020 15:16:46 -0700 Subject: dma-mapping: set default segment_boundary_mask to ULONG_MAX The default segment_boundary_mask was set to DMA_BIT_MAKS(32) a decade ago by referencing SCSI/block subsystem, as a 32-bit mask was good enough for most of the devices. Now more and more drivers set dma_masks above DMA_BIT_MAKS(32) while only a handful of them call dma_set_seg_boundary(). This means that most drivers have a 4GB segmention boundary because DMA API returns a 32-bit default value, though they might not really have such a limit. The default segment_boundary_mask should mean "no limit" since the device doesn't explicitly set the mask. But a 32-bit mask certainly limits those devices capable of 32+ bits addressing. So this patch sets default segment_boundary_mask to ULONG_MAX. Signed-off-by: Nicolin Chen Acked-by: Niklas Schnelle Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index faab0a8210b9..df0bff2ea750 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -629,7 +629,7 @@ static inline unsigned long dma_get_seg_boundary(struct device *dev) { if (dev->dma_parms && dev->dma_parms->segment_boundary_mask) return dev->dma_parms->segment_boundary_mask; - return DMA_BIT_MASK(32); + return ULONG_MAX; } /** -- cgit v1.2.3 From 51db1c37ee166159c5753ce8d64d6bacf113e0f0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 19 Aug 2020 23:20:19 +0800 Subject: blk-mq: Rename BLK_MQ_F_TAG_SHARED as BLK_MQ_F_TAG_QUEUE_SHARED BLK_MQ_F_TAG_SHARED actually means that tags is shared among request queues, all of which should belong to LUNs attached to same HBA. So rename it to make the point explicitly. [jpg: rebase a few times, add rnbd-clt.c change] Suggested-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: John Garry Tested-by: Douglas Gilbert Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 21a02e0577dd..982c4f92b63c 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -378,7 +378,7 @@ struct blk_mq_ops { enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, - BLK_MQ_F_TAG_SHARED = 1 << 1, + BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, /* * Set when this device requires underlying blk-mq device for * completing IO: -- cgit v1.2.3 From 32bc15afed04bd73e29d713d8db47818d6aa89af Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 19 Aug 2020 23:20:24 +0800 Subject: blk-mq: Facilitate a shared sbitmap per tagset Some SCSI HBAs (such as HPSA, megaraid, mpt3sas, hisi_sas_v3 ..) support multiple reply queues with single hostwide tags. In addition, these drivers want to use interrupt assignment in pci_alloc_irq_vectors(PCI_IRQ_AFFINITY). However, as discussed in [0], CPU hotplug may cause in-flight IO completion to not be serviced when an interrupt is shutdown. That problem is solved in commit bf0beec0607d ("blk-mq: drain I/O when all CPUs in a hctx are offline"). However, to take advantage of that blk-mq feature, the HBA HW queuess are required to be mapped to that of the blk-mq hctx's; to do that, the HBA HW queues need to be exposed to the upper layer. In making that transition, the per-SCSI command request tags are no longer unique per Scsi host - they are just unique per hctx. As such, the HBA LLDD would have to generate this tag internally, which has a certain performance overhead. However another problem is that blk-mq assumes the host may accept (Scsi_host.can_queue * #hw queue) commands. In commit 6eb045e092ef ("scsi: core: avoid host-wide host_busy counter for scsi_mq"), the Scsi host busy counter was removed, which would stop the LLDD being sent more than .can_queue commands; however, it should still be ensured that the block layer does not issue more than .can_queue commands to the Scsi host. To solve this problem, introduce a shared sbitmap per blk_mq_tag_set, which may be requested at init time. New flag BLK_MQ_F_TAG_HCTX_SHARED should be set when requesting the tagset to indicate whether the shared sbitmap should be used. Even when BLK_MQ_F_TAG_HCTX_SHARED is set, a full set of tags and requests are still allocated per hctx; the reason for this is that if tags and requests were only allocated for a single hctx - like hctx0 - it may break block drivers which expect a request be associated with a specific hctx, i.e. not always hctx0. This will introduce extra memory usage. This change is based on work originally from Ming Lei in [1] and from Bart's suggestion in [2]. [0] https://lore.kernel.org/linux-block/alpine.DEB.2.21.1904051331270.1802@nanos.tec.linutronix.de/ [1] https://lore.kernel.org/linux-block/20190531022801.10003-1-ming.lei@redhat.com/ [2] https://lore.kernel.org/linux-block/ff77beff-5fd9-9f05-12b6-826922bace1f@huawei.com/T/#m3db0a602f095cbcbff27e9c884d6b4ae826144be Signed-off-by: John Garry Tested-by: Don Brace #SCSI resv cmds patches used Tested-by: Douglas Gilbert Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 982c4f92b63c..df7b903ce7ae 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -231,6 +231,9 @@ enum hctx_type { * @flags: Zero or more BLK_MQ_F_* flags. * @driver_data: Pointer to data owned by the block driver that created this * tag set. + * @__bitmap_tags: A shared tags sbitmap, used over all hctx's + * @__breserved_tags: + * A shared reserved tags sbitmap, used over all hctx's * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues * elements. * @tag_list_lock: Serializes tag_list accesses. @@ -250,6 +253,8 @@ struct blk_mq_tag_set { unsigned int flags; void *driver_data; + struct sbitmap_queue __bitmap_tags; + struct sbitmap_queue __breserved_tags; struct blk_mq_tags **tags; struct mutex tag_list_lock; @@ -384,6 +389,7 @@ enum { * completing IO: */ BLK_MQ_F_STACKING = 1 << 2, + BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3, BLK_MQ_F_BLOCKING = 1 << 5, BLK_MQ_F_NO_SCHED = 1 << 6, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, -- cgit v1.2.3 From bccf5e26d99c28980bd6ced474422a1b18402263 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 19 Aug 2020 23:20:26 +0800 Subject: blk-mq: Record nr_active_requests per queue for when using shared sbitmap The per-hctx nr_active value can no longer be used to fairly assign a share of tag depth per request queue for when using a shared sbitmap, as it does not consider that the tags are shared tags over all hctx's. For this case, record the nr_active_requests per request_queue, and make the judgement based on that value. Co-developed-with: Kashyap Desai Signed-off-by: John Garry Tested-by: Don Brace #SCSI resv cmds patches used Tested-by: Douglas Gilbert Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d0d61bc81615..6277aee2aeaa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -485,6 +485,8 @@ struct request_queue { struct timer_list timeout; struct work_struct timeout_work; + atomic_t nr_active_requests_shared_sbitmap; + struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); -- cgit v1.2.3 From f1b49fdc1c64db110aa1315831e5fe0f8599fa56 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 19 Aug 2020 23:20:27 +0800 Subject: blk-mq: Record active_queues_shared_sbitmap per tag_set for when using shared sbitmap For when using a shared sbitmap, no longer should the number of active request queues per hctx be relied on for when judging how to share the tag bitmap. Instead maintain the number of active request queues per tag_set, and make the judgement based on that. Originally-from: Kashyap Desai Signed-off-by: John Garry Tested-by: Don Brace #SCSI resv cmds patches used Tested-by: Douglas Gilbert Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + include/linux/blkdev.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index df7b903ce7ae..8279c807e1f3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -252,6 +252,7 @@ struct blk_mq_tag_set { unsigned int timeout; unsigned int flags; void *driver_data; + atomic_t active_queues_shared_sbitmap; struct sbitmap_queue __bitmap_tags; struct sbitmap_queue __breserved_tags; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6277aee2aeaa..7d82959e7b86 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -618,6 +618,7 @@ struct request_queue { #define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ #define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ +#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP)) -- cgit v1.2.3 From b445547ec1bbd3e7bf4b1c142550942f70527d95 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Wed, 19 Aug 2020 23:20:28 +0800 Subject: blk-mq, elevator: Count requests per hctx to improve performance High CPU utilization on "native_queued_spin_lock_slowpath" due to lock contention is possible for mq-deadline and bfq IO schedulers when nr_hw_queues is more than one. It is because kblockd work queue can submit IO from all online CPUs (through blk_mq_run_hw_queues()) even though only one hctx has pending commands. The elevator callback .has_work for mq-deadline and bfq scheduler considers pending work if there are any IOs on request queue but it does not account hctx context. Add a per-hctx 'elevator_queued' count to the hctx to avoid triggering the elevator even though there are no requests queued. [jpg: Relocated atomic_dec() in dd_dispatch_request(), update commit message per Kashyap] Signed-off-by: Kashyap Desai Signed-off-by: Hannes Reinecke Signed-off-by: John Garry Tested-by: Douglas Gilbert Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8279c807e1f3..b23eeca4d677 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -139,6 +139,10 @@ struct blk_mq_hw_ctx { * shared across request queues. */ atomic_t nr_active; + /** + * @elevator_queued: Number of queued requests on hctx. + */ + atomic_t elevator_queued; /** @cpuhp_online: List to store request if CPU is going to die */ struct hlist_node cpuhp_online; -- cgit v1.2.3 From c1077616142907bb6ee987ecd136d6857ffd8787 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 1 Sep 2020 15:10:08 -0700 Subject: ip: expose inet sockopts through inet_diag Expose all exisiting inet sockopt bits through inet_diag for debug purpose. Corresponding changes in iproute2 ss will be submitted to output all these values. Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 0ef2d800fda7..84abb30a3fbb 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -75,6 +75,8 @@ static inline size_t inet_diag_msg_attrs_size(void) #ifdef CONFIG_SOCK_CGROUP_DATA + nla_total_size_64bit(sizeof(u64)) /* INET_DIAG_CGROUP_ID */ #endif + + nla_total_size(sizeof(struct inet_diag_sockopt)) + /* INET_DIAG_SOCKOPT */ ; } int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, -- cgit v1.2.3 From fd5a13f4893c8df2a5a3af8599adecb52d05fe89 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:31 +0200 Subject: proc: add a read_iter method to proc proc_ops This will allow proc files to implement iter read semantics. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/proc_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 2df965cd0974..270cab43ca3d 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -30,6 +30,7 @@ struct proc_ops { unsigned int proc_flags; int (*proc_open)(struct inode *, struct file *); ssize_t (*proc_read)(struct file *, char __user *, size_t, loff_t *); + ssize_t (*proc_read_iter)(struct kiocb *, struct iov_iter *); ssize_t (*proc_write)(struct file *, const char __user *, size_t, loff_t *); loff_t (*proc_lseek)(struct file *, loff_t, int); int (*proc_release)(struct inode *, struct file *); -- cgit v1.2.3 From aae4c8e27bd7567132bb931488e2faf1a57c66e9 Mon Sep 17 00:00:00 2001 From: Tom Murphy Date: Mon, 17 Aug 2020 22:00:49 +0100 Subject: iommu: Rename iommu_tlb_* functions to iommu_iotlb_* To keep naming consistent we should stick with *iotlb*. This patch renames a few remaining functions. Signed-off-by: Tom Murphy Link: https://lore.kernel.org/r/20200817210051.13546-1-murphyt7@tcd.ie Signed-off-by: Joerg Roedel --- include/linux/io-pgtable.h | 2 +- include/linux/iommu.h | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 23285ba645db..4cde111e425b 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -31,7 +31,7 @@ enum io_pgtable_fmt { * single page. IOMMUs that cannot batch TLB invalidation * operations efficiently will typically issue them here, but * others may decide to update the iommu_iotlb_gather structure - * and defer the invalidation until iommu_tlb_sync() instead. + * and defer the invalidation until iommu_iotlb_sync() instead. * * Note that these can all be called in atomic context and must therefore * not block. diff --git a/include/linux/iommu.h b/include/linux/iommu.h index fee209efb756..2ad26d8b4ab9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -514,13 +514,13 @@ extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr) extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags); -static inline void iommu_flush_tlb_all(struct iommu_domain *domain) +static inline void iommu_flush_iotlb_all(struct iommu_domain *domain) { if (domain->ops->flush_iotlb_all) domain->ops->flush_iotlb_all(domain); } -static inline void iommu_tlb_sync(struct iommu_domain *domain, +static inline void iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather) { if (domain->ops->iotlb_sync) @@ -543,7 +543,7 @@ static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, if (gather->pgsize != size || end < gather->start || start > gather->end) { if (gather->pgsize) - iommu_tlb_sync(domain, gather); + iommu_iotlb_sync(domain, gather); gather->pgsize = size; } @@ -725,11 +725,11 @@ static inline size_t iommu_map_sg_atomic(struct iommu_domain *domain, return 0; } -static inline void iommu_flush_tlb_all(struct iommu_domain *domain) +static inline void iommu_flush_iotlb_all(struct iommu_domain *domain) { } -static inline void iommu_tlb_sync(struct iommu_domain *domain, +static inline void iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather) { } -- cgit v1.2.3 From 25e804926da39f1de7ae486920bfe65b099195f1 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 3 Sep 2020 17:14:56 +0530 Subject: soundwire: define and use addr bit masks Soundwire addr is a 52bit value encoding link, version, unique id, mfg id, part id and class id. Define bit masks for these and use FIELD_GET() to extract these fields. Signed-off-by: Vinod Koul Tested-by: Bard Liao Link: https://lore.kernel.org/r/20200903114504.1202143-2-vkoul@kernel.org Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 78f52cdeb2c9..1e9010c139f0 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -5,6 +5,7 @@ #define __SOUNDWIRE_H #include +#include struct sdw_bus; struct sdw_slave; @@ -456,13 +457,19 @@ struct sdw_slave_id { * * The MIPI DisCo for SoundWire defines in addition the link_id as bits 51:48 */ - -#define SDW_DISCO_LINK_ID(adr) (((adr) >> 48) & GENMASK(3, 0)) -#define SDW_VERSION(adr) (((adr) >> 44) & GENMASK(3, 0)) -#define SDW_UNIQUE_ID(adr) (((adr) >> 40) & GENMASK(3, 0)) -#define SDW_MFG_ID(adr) (((adr) >> 24) & GENMASK(15, 0)) -#define SDW_PART_ID(adr) (((adr) >> 8) & GENMASK(15, 0)) -#define SDW_CLASS_ID(adr) ((adr) & GENMASK(7, 0)) +#define SDW_DISCO_LINK_ID_MASK GENMASK_ULL(51, 48) +#define SDW_VERSION_MASK GENMASK_ULL(47, 44) +#define SDW_UNIQUE_ID_MASK GENMASK_ULL(43, 40) +#define SDW_MFG_ID_MASK GENMASK_ULL(39, 24) +#define SDW_PART_ID_MASK GENMASK_ULL(23, 8) +#define SDW_CLASS_ID_MASK GENMASK_ULL(7, 0) + +#define SDW_DISCO_LINK_ID(addr) FIELD_GET(SDW_DISCO_LINK_ID_MASK, addr) +#define SDW_VERSION(addr) FIELD_GET(SDW_VERSION_MASK, addr) +#define SDW_UNIQUE_ID(addr) FIELD_GET(SDW_UNIQUE_ID_MASK, addr) +#define SDW_MFG_ID(addr) FIELD_GET(SDW_MFG_ID_MASK, addr) +#define SDW_PART_ID(addr) FIELD_GET(SDW_PART_ID_MASK, addr) +#define SDW_CLASS_ID(addr) FIELD_GET(SDW_CLASS_ID_MASK, addr) /** * struct sdw_slave_intr_status - Slave interrupt status -- cgit v1.2.3 From 8be2f84acf2317068c93fea291d823ac30d172d6 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 3 Sep 2020 17:15:04 +0530 Subject: soundwire: remove SDW_REG_SHIFT() soundwire had defined SDW_REG_SHIFT to calculate shift values for bitmasks, but now that we have better things in bitfield.h, remove this. Signed-off-by: Vinod Koul Tested-by: Bard Liao Link: https://lore.kernel.org/r/20200903114504.1202143-10-vkoul@kernel.org Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw_registers.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw_registers.h b/include/linux/soundwire/sdw_registers.h index 5d3c271af7d1..f420e8059779 100644 --- a/include/linux/soundwire/sdw_registers.h +++ b/include/linux/soundwire/sdw_registers.h @@ -4,13 +4,6 @@ #ifndef __SDW_REGISTERS_H #define __SDW_REGISTERS_H -/* - * typically we define register and shifts but if one observes carefully, - * the shift can be generated from MASKS using few bit primitaives like ffs - * etc, so we use that and avoid defining shifts - */ -#define SDW_REG_SHIFT(n) (ffs(n) - 1) - /* * SDW registers as defined by MIPI 1.2 Spec */ -- cgit v1.2.3 From c7eb900f5f45eeab1ea1bed997a2a12d8b5907bc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 28 Aug 2020 19:12:11 +0300 Subject: iommu/vt-d: Move intel_iommu_gfx_mapped to Intel IOMMU header Static analyzer is not happy about intel_iommu_gfx_mapped declaration: .../drivers/iommu/intel/iommu.c:364:5: warning: symbol 'intel_iommu_gfx_mapped' was not declared. Should it be static? Move its declaration to Intel IOMMU header file. Signed-off-by: Andy Shevchenko Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20200828161212.71294-1-andriy.shevchenko@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index b1ed2f25f7c0..6a3ddaabf3f5 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -792,6 +792,7 @@ extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; extern int intel_iommu_enabled; extern int intel_iommu_tboot_noforce; +extern int intel_iommu_gfx_mapped; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) { -- cgit v1.2.3 From c2c59456e1fcad3f464761c5839399176e3a934a Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Fri, 4 Sep 2020 18:40:38 +0800 Subject: iommu/mediatek: Check 4GB mode by reading infracfg In previous discussion [1] and [2], we found that it is risky to use max_pfn or totalram_pages to tell if 4GB mode is enabled. Check 4GB mode by reading infracfg register, remove the usage of the un-exported symbol max_pfn. This is a step towards building mtk_iommu as a kernel module. [1] https://lore.kernel.org/lkml/20200603161132.2441-1-miles.chen@mediatek.com/ [2] https://lore.kernel.org/lkml/20200604080120.2628-1-miles.chen@mediatek.com/ [3] https://lore.kernel.org/lkml/20200715205120.GA778876@bogus/ Cc: Mike Rapoport Cc: David Hildenbrand Cc: Yong Wu Cc: Yingjoe Chen Cc: Christoph Hellwig Cc: Rob Herring Cc: Matthias Brugger Cc: Joerg Roedel Reviewed-by: Matthias Brugger Signed-off-by: Miles Chen Link: https://lore.kernel.org/r/20200904104038.4979-1-miles.chen@mediatek.com Signed-off-by: Joerg Roedel --- include/linux/soc/mediatek/infracfg.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index fd25f0148566..233463d789c6 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -32,6 +32,9 @@ #define MT7622_TOP_AXI_PROT_EN_WB (BIT(2) | BIT(6) | \ BIT(7) | BIT(8)) +#define REG_INFRA_MISC 0xf00 +#define F_DDR_4GB_SUPPORT_EN BIT(13) + int mtk_infracfg_set_bus_protection(struct regmap *infracfg, u32 mask, bool reg_update); int mtk_infracfg_clear_bus_protection(struct regmap *infracfg, u32 mask, -- cgit v1.2.3 From 4beba9486abd2f86d125271d6946f7c38ed0fe77 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Wed, 22 Apr 2020 15:25:27 +0100 Subject: mm: Add PG_arch_2 page flag For arm64 MTE support it is necessary to be able to mark pages that contain user space visible tags that will need to be saved/restored e.g. when swapped out. To support this add a new arch specific flag (PG_arch_2). This flag is only available on 64-bit architectures due to the limited number of spare page flags on the 32-bit ones. Signed-off-by: Steven Price [catalin.marinas@arm.com: use CONFIG_64BIT for guarding this new flag] Signed-off-by: Catalin Marinas Cc: Andrew Morton --- include/linux/kernel-page-flags.h | 1 + include/linux/page-flags.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h index abd20ef93c98..eee1877a354e 100644 --- a/include/linux/kernel-page-flags.h +++ b/include/linux/kernel-page-flags.h @@ -17,5 +17,6 @@ #define KPF_ARCH 38 #define KPF_UNCACHED 39 #define KPF_SOFTDIRTY 40 +#define KPF_ARCH_2 41 #endif /* LINUX_KERNEL_PAGE_FLAGS_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6be1aa559b1e..276140c94f4a 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -135,6 +135,9 @@ enum pageflags { #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) PG_young, PG_idle, +#endif +#ifdef CONFIG_64BIT + PG_arch_2, #endif __NR_PAGEFLAGS, -- cgit v1.2.3 From b3fbbea4c00220f62e6f7e2514466e6ee81f7f60 Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Mon, 25 Nov 2019 17:27:06 +0000 Subject: mm: Introduce arch_calc_vm_flag_bits() Similarly to arch_calc_vm_prot_bits(), introduce a dummy arch_calc_vm_flag_bits() invoked from calc_vm_flag_bits(). This macro can be overridden by architectures to insert specific VM_* flags derived from the mmap() MAP_* flags. Signed-off-by: Kevin Brodsky Signed-off-by: Catalin Marinas Cc: Andrew Morton --- include/linux/mman.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mman.h b/include/linux/mman.h index 6f34c33075f9..6fa15c9b12af 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -78,13 +78,18 @@ static inline void vm_unacct_memory(long pages) } /* - * Allow architectures to handle additional protection bits + * Allow architectures to handle additional protection and flag bits. The + * overriding macros must be defined in the arch-specific asm/mman.h file. */ #ifndef arch_calc_vm_prot_bits #define arch_calc_vm_prot_bits(prot, pkey) 0 #endif +#ifndef arch_calc_vm_flag_bits +#define arch_calc_vm_flag_bits(flags) 0 +#endif + #ifndef arch_vm_get_page_prot #define arch_vm_get_page_prot(vm_flags) __pgprot(0) #endif @@ -135,7 +140,8 @@ calc_vm_flag_bits(unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | - _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ); + _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | + arch_calc_vm_flag_bits(flags); } unsigned long vm_commit_limit(void); -- cgit v1.2.3 From 9f3419315f3cdc41a7318e4d50ba18a592b30c8c Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 27 Nov 2019 10:00:27 +0000 Subject: arm64: mte: Add PROT_MTE support to mmap() and mprotect() To enable tagging on a memory range, the user must explicitly opt in via a new PROT_MTE flag passed to mmap() or mprotect(). Since this is a new memory type in the AttrIndx field of a pte, simplify the or'ing of these bits over the protection_map[] attributes by making MT_NORMAL index 0. There are two conditions for arch_vm_get_page_prot() to return the MT_NORMAL_TAGGED memory type: (1) the user requested it via PROT_MTE, registered as VM_MTE in the vm_flags, and (2) the vma supports MTE, decided during the mmap() call (only) and registered as VM_MTE_ALLOWED. arch_calc_vm_prot_bits() is responsible for registering the user request as VM_MTE. The newly introduced arch_calc_vm_flag_bits() sets VM_MTE_ALLOWED if the mapping is MAP_ANONYMOUS. An MTE-capable filesystem (RAM-based) may be able to set VM_MTE_ALLOWED during its mmap() file ops call. In addition, update VM_DATA_DEFAULT_FLAGS to allow mprotect(PROT_MTE) on stack or brk area. The Linux mmap() syscall currently ignores unknown PROT_* flags. In the presence of MTE, an mmap(PROT_MTE) on a file which does not support MTE will not report an error and the memory will not be mapped as Normal Tagged. For consistency, mprotect(PROT_MTE) will not report an error either if the memory range does not support MTE. Two subsequent patches in the series will propose tightening of this behaviour. Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Signed-off-by: Catalin Marinas Cc: Will Deacon --- include/linux/mm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ca6e6a81576b..4312c6c808e9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -340,6 +340,14 @@ extern unsigned int kobjsize(const void *objp); # define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */ #endif +#if defined(CONFIG_ARM64_MTE) +# define VM_MTE VM_HIGH_ARCH_0 /* Use Tagged memory for access control */ +# define VM_MTE_ALLOWED VM_HIGH_ARCH_1 /* Tagged memory permitted */ +#else +# define VM_MTE VM_NONE +# define VM_MTE_ALLOWED VM_NONE +#endif + #ifndef VM_GROWSUP # define VM_GROWSUP VM_NONE #endif -- cgit v1.2.3 From c462ac288f2c97e0c1d9ff6a65955317e799f958 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 25 Nov 2019 17:27:06 +0000 Subject: mm: Introduce arch_validate_flags() Similarly to arch_validate_prot() called from do_mprotect_pkey(), an architecture may need to sanity-check the new vm_flags. Define a dummy function always returning true. In addition to do_mprotect_pkey(), also invoke it from mmap_region() prior to updating vma->vm_page_prot to allow the architecture code to veto potentially inconsistent vm_flags. Signed-off-by: Catalin Marinas Acked-by: Andrew Morton --- include/linux/mman.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mman.h b/include/linux/mman.h index 6fa15c9b12af..629cefc4ecba 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -108,6 +108,19 @@ static inline bool arch_validate_prot(unsigned long prot, unsigned long addr) #define arch_validate_prot arch_validate_prot #endif +#ifndef arch_validate_flags +/* + * This is called from mmap() and mprotect() with the updated vma->vm_flags. + * + * Returns true if the VM_* flags are valid. + */ +static inline bool arch_validate_flags(unsigned long flags) +{ + return true; +} +#define arch_validate_flags arch_validate_flags +#endif + /* * Optimisation macro. It is equivalent to: * (x & bit1) ? bit2 : 0 -- cgit v1.2.3 From 8a84802e2a2b1a682761a31c2685506b9f4e1840 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Wed, 13 May 2020 16:37:49 +0100 Subject: mm: Add arch hooks for saving/restoring tags Arm's Memory Tagging Extension (MTE) adds some metadata (tags) to every physical page, when swapping pages out to disk it is necessary to save these tags, and later restore them when reading the pages back. Add some hooks along with dummy implementations to enable the arch code to handle this. Three new hooks are added to the swap code: * arch_prepare_to_swap() and * arch_swap_invalidate_page() / arch_swap_invalidate_area(). One new hook is added to shmem: * arch_swap_restore() Signed-off-by: Steven Price [catalin.marinas@arm.com: add unlock_page() on the error path] [catalin.marinas@arm.com: dropped the _tags suffix] Signed-off-by: Catalin Marinas Acked-by: Andrew Morton --- include/linux/pgtable.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e8cbc2e795d5..dc3b74129fbc 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -633,6 +633,34 @@ static inline int arch_unmap_one(struct mm_struct *mm, } #endif +/* + * Allow architectures to preserve additional metadata associated with + * swapped-out pages. The corresponding __HAVE_ARCH_SWAP_* macros and function + * prototypes must be defined in the arch-specific asm/pgtable.h file. + */ +#ifndef __HAVE_ARCH_PREPARE_TO_SWAP +static inline int arch_prepare_to_swap(struct page *page) +{ + return 0; +} +#endif + +#ifndef __HAVE_ARCH_SWAP_INVALIDATE +static inline void arch_swap_invalidate_page(int type, pgoff_t offset) +{ +} + +static inline void arch_swap_invalidate_area(int type) +{ +} +#endif + +#ifndef __HAVE_ARCH_SWAP_RESTORE +static inline void arch_swap_restore(swp_entry_t entry, struct page *page) +{ +} +#endif + #ifndef __HAVE_ARCH_PGD_OFFSET_GATE #define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) #endif -- cgit v1.2.3 From e48c15b796d412ede883bb2ef7779b2a142f7962 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 29 Jun 2020 17:21:32 -0700 Subject: smp: Add source and destination CPUs to __call_single_data This commit adds a destination CPU to __call_single_data, and is inspired by an earlier commit by Peter Zijlstra. This version adds #ifdef to permit use by 32-bit systems and supplying the destination CPU for all smp_call_function*() requests, not just smp_call_function_single(). If need be, 32-bit systems could be accommodated by shrinking the flags field to 16 bits (the atomic_t variant is currently unused) and by providing only eight bits for CPU on such systems. It is not clear that the addition of the fields to __call_single_node are really needed. [ paulmck: Apply Boqun Feng feedback on 32-bit builds. ] Link: https://lore.kernel.org/lkml/20200615164048.GC2531@hirez.programming.kicks-ass.net/ Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Sebastian Andrzej Siewior Cc: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/smp.h | 3 +++ include/linux/smp_types.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 80d557ef8a11..9f13966d3d92 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -26,6 +26,9 @@ struct __call_single_data { struct { struct llist_node llist; unsigned int flags; +#ifdef CONFIG_64BIT + u16 src, dst; +#endif }; }; smp_call_func_t func; diff --git a/include/linux/smp_types.h b/include/linux/smp_types.h index 364b3ae3e41d..2e8461af8df6 100644 --- a/include/linux/smp_types.h +++ b/include/linux/smp_types.h @@ -61,6 +61,9 @@ struct __call_single_node { unsigned int u_flags; atomic_t a_flags; }; +#ifdef CONFIG_64BIT + u16 src, dst; +#endif }; #endif /* __LINUX_SMP_TYPES_H */ -- cgit v1.2.3 From 0f7c5317b89058e432d3e97efa19467ff4c3b86b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 4 Sep 2020 14:37:29 -0700 Subject: of: Export of_remove_property() to modules We will need to remove some OF properties in drivers/net/dsa/bcm_sf2.c with a subsequent commit. Export of_remove_property() to modules so we can keep bcm_sf2 modular and provide an empty stub for when CONFIG_OF is disabled to maintain the ability to compile test. Signed-off-by: Florian Fainelli Acked-by: Rob Herring Signed-off-by: Jakub Kicinski --- include/linux/of.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 5cf7ae0465d1..481ec0467285 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -929,6 +929,11 @@ static inline int of_machine_is_compatible(const char *compat) return 0; } +static inline int of_remove_property(struct device_node *np, struct property *prop) +{ + return 0; +} + static inline bool of_console_check(const struct device_node *dn, const char *name, int index) { return false; -- cgit v1.2.3 From 78366e9cbd7892ac3d321e62c4be7a7fe79a69ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 27 Jul 2020 15:38:35 +0200 Subject: mmc: sdio: Parse CISTPL_VERS_1 major and minor revision numbers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They should indicate compliance of standard. Signed-off-by: Pali Rohár Link: https://lore.kernel.org/r/20200727133837.19086-3-pali@kernel.org Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 2 ++ include/linux/mmc/sdio_func.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 7d46411ffaa2..42df06c6b19c 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -297,6 +297,8 @@ struct mmc_card { struct sdio_cis cis; /* common tuple info */ struct sdio_func *sdio_func[SDIO_MAX_FUNCS]; /* SDIO functions (devices) */ struct sdio_func *sdio_single_irq; /* SDIO function when only one IRQ active */ + u8 major_rev; /* major revision number */ + u8 minor_rev; /* minor revision number */ unsigned num_info; /* number of info strings */ const char **info; /* info strings */ struct sdio_func_tuple *tuples; /* unknown common tuples */ diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index fa2aaab5e57a..478855b8e406 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -51,6 +51,8 @@ struct sdio_func { u8 *tmpbuf; /* DMA:able scratch buffer */ + u8 major_rev; /* major revision number */ + u8 minor_rev; /* minor revision number */ unsigned num_info; /* number of info strings */ const char **info; /* info strings */ -- cgit v1.2.3 From 6932794192f63c5e105eade19fba26b8ef29ec79 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 21 Aug 2020 08:35:33 +0200 Subject: mmc: core: Improve documentation of MMC_CAP_HW_RESET MMC_CAP_HW_RESET means that the controller is capable of resetting the eMMC device via RST_n signal, not a reset of the controller. Two drivers got this wrong, so let's make it more clear. Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20200821063533.3771-1-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index c5b6e97cb21a..799e23b0a23c 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -163,6 +163,7 @@ struct mmc_host_ops { int (*select_drive_strength)(struct mmc_card *card, unsigned int max_dtr, int host_drv, int card_drv, int *drv_type); + /* Reset the eMMC card via RST_n */ void (*hw_reset)(struct mmc_host *host); void (*card_event)(struct mmc_host *host); @@ -346,7 +347,7 @@ struct mmc_host { #define MMC_CAP_CD_WAKE (1 << 28) /* Enable card detect wake */ #define MMC_CAP_CMD_DURING_TFR (1 << 29) /* Commands during data transfer */ #define MMC_CAP_CMD23 (1 << 30) /* CMD23 supported. */ -#define MMC_CAP_HW_RESET (1 << 31) /* Hardware reset */ +#define MMC_CAP_HW_RESET (1 << 31) /* Reset the eMMC card via RST_n */ u32 caps2; /* More host capabilities */ -- cgit v1.2.3 From 97a7d87e96b02fc5b3944d7735e0f6b8446d07da Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 1 Sep 2020 17:02:48 +0200 Subject: mmc: core: add a 'doing_init_tune' flag and a 'mmc_doing_tune' helper Our driver needs to know when tuning is in progress. 'doing_retune' only covers re-tuning, not the initial tuning. Add another flag to detect the initial tuning state and add a helper which tells us if any kind of tuning is going on. Only implemented for MMC currently because that's where we need it. SD can be added later if it becomes necessary. Signed-off-by: Wolfram Sang Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20200901150250.26236-3-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 799e23b0a23c..c079b932330f 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -400,6 +400,7 @@ struct mmc_host { unsigned int use_spi_crc:1; unsigned int claimed:1; /* host exclusively claimed */ unsigned int bus_dead:1; /* bus has been released */ + unsigned int doing_init_tune:1; /* initial tuning in progress */ unsigned int can_retune:1; /* re-tuning can be used */ unsigned int doing_retune:1; /* re-tuning in progress */ unsigned int retune_now:1; /* do re-tuning at next req */ @@ -595,6 +596,11 @@ static inline bool mmc_doing_retune(struct mmc_host *host) return host->doing_retune == 1; } +static inline bool mmc_doing_tune(struct mmc_host *host) +{ + return host->doing_retune == 1 || host->doing_init_tune == 1; +} + static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data) { return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE; -- cgit v1.2.3 From 180c284ce4d66d2fb386b81bea59f01bc7be150a Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 4 Sep 2020 15:51:20 +0300 Subject: device connection: Remove device_connection_find() There are no users for that function. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200904125123.83725-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index ca18da4768e3..0704461e8aad 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -319,8 +319,6 @@ void *fwnode_connection_find_match(struct fwnode_handle *fwnode, void *device_connection_find_match(struct device *dev, const char *con_id, void *data, devcon_match_fn_t match); -struct device *device_connection_find(struct device *dev, const char *con_id); - void device_connection_add(struct device_connection *con); void device_connection_remove(struct device_connection *con); -- cgit v1.2.3 From 87ea5926247f7e15f0b5bc5b36cb210536177d77 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 4 Sep 2020 15:51:21 +0300 Subject: device connection: Remove device_connection_add() All the users of that API have now been converted to use software fwnodes instead. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200904125123.83725-3-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 0704461e8aad..ea37debb0a98 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -297,7 +297,6 @@ struct device_dma_parameters { * @fwnode: The device node of the connected device * @endpoint: The names of the two devices connected together * @id: Unique identifier for the connection - * @list: List head, private, for internal use only * * NOTE: @fwnode is not used together with @endpoint. @fwnode is used when * platform firmware defines the connection. When the connection is registered @@ -307,7 +306,6 @@ struct device_connection { struct fwnode_handle *fwnode; const char *endpoint[2]; const char *id; - struct list_head list; }; typedef void *(*devcon_match_fn_t)(struct device_connection *con, int ep, @@ -319,33 +317,6 @@ void *fwnode_connection_find_match(struct fwnode_handle *fwnode, void *device_connection_find_match(struct device *dev, const char *con_id, void *data, devcon_match_fn_t match); -void device_connection_add(struct device_connection *con); -void device_connection_remove(struct device_connection *con); - -/** - * device_connections_add - Add multiple device connections at once - * @cons: Zero terminated array of device connection descriptors - */ -static inline void device_connections_add(struct device_connection *cons) -{ - struct device_connection *c; - - for (c = cons; c->endpoint[0]; c++) - device_connection_add(c); -} - -/** - * device_connections_remove - Remove multiple device connections at once - * @cons: Zero terminated array of device connection descriptors - */ -static inline void device_connections_remove(struct device_connection *cons) -{ - struct device_connection *c; - - for (c = cons; c->endpoint[0]; c++) - device_connection_remove(c); -} - /** * enum device_link_state - Device link states. * @DL_STATE_NONE: The presence of the drivers is not being tracked. -- cgit v1.2.3 From f5514c91e9f72b719bfec64af6acac5ad41df7b5 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 4 Sep 2020 15:51:22 +0300 Subject: device connection: Remove struct device_connection Since the connection descriptors can't be stored into the list anymore, there is no need for the data structure. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200904125123.83725-4-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index ea37debb0a98..d4612efaab82 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -292,23 +292,7 @@ struct device_dma_parameters { unsigned long segment_boundary_mask; }; -/** - * struct device_connection - Device Connection Descriptor - * @fwnode: The device node of the connected device - * @endpoint: The names of the two devices connected together - * @id: Unique identifier for the connection - * - * NOTE: @fwnode is not used together with @endpoint. @fwnode is used when - * platform firmware defines the connection. When the connection is registered - * with device_connection_add() @endpoint is used instead. - */ -struct device_connection { - struct fwnode_handle *fwnode; - const char *endpoint[2]; - const char *id; -}; - -typedef void *(*devcon_match_fn_t)(struct device_connection *con, int ep, +typedef void *(*devcon_match_fn_t)(struct fwnode_handle *fwnode, const char *id, void *data); void *fwnode_connection_find_match(struct fwnode_handle *fwnode, -- cgit v1.2.3 From 07e292950b9368518c659c4d5f1dca4bf55779bd Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 20 Aug 2020 21:53:41 -0600 Subject: PCI: Allow root and child buses to have different pci_ops PCI host bridges often have different ways to access the root and child bus config spaces. The host bridge drivers have invented their own abstractions to handle this. Let's support having different root and child bus pci_ops so these per driver abstractions can be removed. Link: https://lore.kernel.org/r/20200821035420.380495-2-robh@kernel.org Signed-off-by: Rob Herring Signed-off-by: Lorenzo Pieralisi Cc: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 835530605c0d..1fbe95a7d386 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -523,6 +523,7 @@ struct pci_host_bridge { struct device dev; struct pci_bus *bus; /* Root bus */ struct pci_ops *ops; + struct pci_ops *child_ops; void *sysdata; int busnr; struct list_head windows; /* resource_entry */ -- cgit v1.2.3 From e1f82a0dcf388d98bcc7ad195c03bd812405e6b2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 26 Aug 2020 13:44:59 +0300 Subject: driver core: Annotate dev_err_probe() with __must_check We have got already new users of this API which interpret it differently and miss the opportunity to optimize their code. In order to avoid similar cases in the future, annotate dev_err_probe() with __must_check. Fixes: a787e5400a1c ("driver core: add device probe log helper") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200826104459.81979-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index d4612efaab82..0b3dc72f64b2 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -931,7 +931,7 @@ void device_links_supplier_sync_state_pause(void); void device_links_supplier_sync_state_resume(void); extern __printf(3, 4) -int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); +int __must_check dev_err_probe(const struct device *dev, int err, const char *fmt, ...); /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ -- cgit v1.2.3 From 82894c1d397f16c2208a35dbb1310559f31980bb Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Mon, 7 Sep 2020 01:04:51 +0200 Subject: firmware: arm_scmi: Constify ops pointers in scmi_handle These are never modified, so make them const to allow drivers to make them const. Link: https://lore.kernel.org/r/20200906230452.33410-3-rikard.falkeborn@gmail.com Signed-off-by: Rikard Falkeborn Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 7e5dd7d1e221..05570afc7f74 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -279,12 +279,12 @@ struct scmi_notify_ops { struct scmi_handle { struct device *dev; struct scmi_revision_info *version; - struct scmi_perf_ops *perf_ops; - struct scmi_clk_ops *clk_ops; - struct scmi_power_ops *power_ops; - struct scmi_sensor_ops *sensor_ops; - struct scmi_reset_ops *reset_ops; - struct scmi_notify_ops *notify_ops; + const struct scmi_perf_ops *perf_ops; + const struct scmi_clk_ops *clk_ops; + const struct scmi_power_ops *power_ops; + const struct scmi_sensor_ops *sensor_ops; + const struct scmi_reset_ops *reset_ops; + const struct scmi_notify_ops *notify_ops; /* for protocol internal use */ void *perf_priv; void *clk_priv; -- cgit v1.2.3 From 8b10fe68985278de4926daa56ad6af701839e40a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 10 Aug 2020 10:21:39 -0400 Subject: fscrypt: drop unused inode argument from fscrypt_fname_alloc_buffer Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20200810142139.487631-1-jlayton@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 991ff8575d0e..eaf16eb55788 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -197,7 +197,7 @@ static inline void fscrypt_free_filename(struct fscrypt_name *fname) kfree(fname->crypto_buf.name); } -int fscrypt_fname_alloc_buffer(const struct inode *inode, u32 max_encrypted_len, +int fscrypt_fname_alloc_buffer(u32 max_encrypted_len, struct fscrypt_str *crypto_str); void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str); int fscrypt_fname_disk_to_usr(const struct inode *inode, @@ -428,8 +428,7 @@ static inline void fscrypt_free_filename(struct fscrypt_name *fname) return; } -static inline int fscrypt_fname_alloc_buffer(const struct inode *inode, - u32 max_encrypted_len, +static inline int fscrypt_fname_alloc_buffer(u32 max_encrypted_len, struct fscrypt_str *crypto_str) { return -EOPNOTSUPP; -- cgit v1.2.3 From 384d87ef2c954fc58e6c5fd8253e4a1984f5fe02 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 4 Sep 2020 10:58:52 +0200 Subject: block: Do not discard buffers under a mounted filesystem Discarding blocks and buffers under a mounted filesystem is hardly anything admin wants to do. Usually it will confuse the filesystem and sometimes the loss of buffer_head state (including b_private field) can even cause crashes like: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 PGD 0 P4D 0 Oops: 0002 [#1] SMP PTI CPU: 4 PID: 203778 Comm: jbd2/dm-3-8 Kdump: loaded Tainted: G O --------- - - 4.18.0-147.5.0.5.h126.eulerosv2r9.x86_64 #1 Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 1.57 08/11/2015 RIP: 0010:jbd2_journal_grab_journal_head+0x1b/0x40 [jbd2] ... Call Trace: __jbd2_journal_insert_checkpoint+0x23/0x70 [jbd2] jbd2_journal_commit_transaction+0x155f/0x1b60 [jbd2] kjournald2+0xbd/0x270 [jbd2] So if we don't have block device open with O_EXCL already, claim the block device while we truncate buffer cache. This makes sure any exclusive block device user (such as filesystem) cannot operate on the device while we are discarding buffer cache. Reported-by: Ye Bin Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig [axboe: fix !CONFIG_BLOCK error in truncate_bdev_range()] Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7d82959e7b86..37ec5a73d027 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1987,11 +1987,18 @@ void bdput(struct block_device *); #ifdef CONFIG_BLOCK void invalidate_bdev(struct block_device *bdev); +int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, + loff_t lend); int sync_blockdev(struct block_device *bdev); #else static inline void invalidate_bdev(struct block_device *bdev) { } +static inline int truncate_bdev_range(struct block_device *bdev, fmode_t mode, + loff_t lstart, loff_t lend) +{ + return 0; +} static inline int sync_blockdev(struct block_device *bdev) { return 0; -- cgit v1.2.3 From 66ada2ccae4ed4dd07ba91df3b5fdb4c11335bd1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 29 Aug 2020 22:00:01 +0900 Subject: kprobes: Add generic kretprobe trampoline handler Add a generic kretprobe trampoline handler for unifying the all cloned /arch/* kretprobe trampoline handlers. The generic kretprobe trampoline handler is based on the x86 implementation, because it is the latest implementation. It has frame pointer checking, kprobe_busy_begin/end and return address fixup for user handlers. [ mingo: Minor edits. ] Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/159870600138.1229682.3424065380448088833.stgit@devnote2 --- include/linux/kprobes.h | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 9be1bff4f586..72142ae5df3e 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -187,10 +187,38 @@ static inline int kprobes_built_in(void) return 1; } +extern struct kprobe kprobe_busy; +extern void kprobe_busy_begin(void); +extern void kprobe_busy_end(void); + #ifdef CONFIG_KRETPROBES extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs); extern int arch_trampoline_kprobe(struct kprobe *p); + +/* If the trampoline handler called from a kprobe, use this version */ +unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs, + void *trampoline_address, + void *frame_pointer); + +static nokprobe_inline +unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, + void *trampoline_address, + void *frame_pointer) +{ + unsigned long ret; + /* + * Set a dummy kprobe for avoiding kretprobe recursion. + * Since kretprobe never runs in kprobe handler, no kprobe must + * be running at this point. + */ + kprobe_busy_begin(); + ret = __kretprobe_trampoline_handler(regs, trampoline_address, frame_pointer); + kprobe_busy_end(); + + return ret; +} + #else /* CONFIG_KRETPROBES */ static inline void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) @@ -354,10 +382,6 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) return this_cpu_ptr(&kprobe_ctlblk); } -extern struct kprobe kprobe_busy; -void kprobe_busy_begin(void); -void kprobe_busy_end(void); - kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset); int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); -- cgit v1.2.3 From b338817807538c893540e393856b79cbbdf777ea Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 29 Aug 2020 22:02:47 +0900 Subject: kprobes: Free kretprobe_instance with RCU callback Free kretprobe_instance with RCU callback instead of directly freeing the object in the kretprobe handler context. This will make kretprobe run safer in NMI context. Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/159870616685.1229682.11978742048709542226.stgit@devnote2 --- include/linux/kprobes.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 72142ae5df3e..3389067d88b1 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -156,7 +156,10 @@ struct kretprobe { }; struct kretprobe_instance { - struct hlist_node hlist; + union { + struct hlist_node hlist; + struct rcu_head rcu; + }; struct kretprobe *rp; kprobe_opcode_t *ret_addr; struct task_struct *task; @@ -395,7 +398,6 @@ int register_kretprobes(struct kretprobe **rps, int num); void unregister_kretprobes(struct kretprobe **rps, int num); void kprobe_flush_task(struct task_struct *tk); -void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); int disable_kprobe(struct kprobe *kp); int enable_kprobe(struct kprobe *kp); -- cgit v1.2.3 From 319f0ce284fff8e4f95167cb144acc905d0584c7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 29 Aug 2020 22:03:02 +0900 Subject: kprobes: Make local functions static Since we unified the kretprobe trampoline handler from arch/* code, some functions and objects do not need to be exported anymore. Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/159870618256.1229682.8692046612635810882.stgit@devnote2 --- include/linux/kprobes.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 3389067d88b1..5c8c271fa1e9 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -190,7 +190,6 @@ static inline int kprobes_built_in(void) return 1; } -extern struct kprobe kprobe_busy; extern void kprobe_busy_begin(void); extern void kprobe_busy_end(void); @@ -235,16 +234,6 @@ static inline int arch_trampoline_kprobe(struct kprobe *p) extern struct kretprobe_blackpoint kretprobe_blacklist[]; -static inline void kretprobe_assert(struct kretprobe_instance *ri, - unsigned long orig_ret_address, unsigned long trampoline_address) -{ - if (!orig_ret_address || (orig_ret_address == trampoline_address)) { - printk("kretprobe BUG!: Processing kretprobe %p @ %p\n", - ri->rp, ri->rp->kp.addr); - BUG(); - } -} - #ifdef CONFIG_KPROBES_SANITY_TEST extern int init_test_probes(void); #else @@ -364,10 +353,6 @@ int arch_check_ftrace_location(struct kprobe *p); /* Get the kprobe at this addr (if any) - called with preemption disabled */ struct kprobe *get_kprobe(void *addr); -void kretprobe_hash_lock(struct task_struct *tsk, - struct hlist_head **head, unsigned long *flags); -void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags); -struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk); /* kprobe_running() will just return the current_kprobe on this CPU */ static inline struct kprobe *kprobe_running(void) -- cgit v1.2.3 From d7cf5590393132993f2e6d2618fd23a20a6342ca Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 7 Sep 2020 15:05:31 +0300 Subject: device property: Move fwnode_connection_find_match() under drivers/base/property.c The function is now only a helper that searches the connection from device graph and then by checking if the supplied connection identifier matches a property that contains reference. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200907120532.37611-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 9 --------- include/linux/property.h | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 0b3dc72f64b2..bf480dbe3375 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -292,15 +292,6 @@ struct device_dma_parameters { unsigned long segment_boundary_mask; }; -typedef void *(*devcon_match_fn_t)(struct fwnode_handle *fwnode, const char *id, - void *data); - -void *fwnode_connection_find_match(struct fwnode_handle *fwnode, - const char *con_id, void *data, - devcon_match_fn_t match); -void *device_connection_find_match(struct device *dev, const char *con_id, - void *data, devcon_match_fn_t match); - /** * enum device_link_state - Device link states. * @DL_STATE_NONE: The presence of the drivers is not being tracked. diff --git a/include/linux/property.h b/include/linux/property.h index 9f805c442819..aedae94dcf41 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -418,6 +418,20 @@ fwnode_graph_get_endpoint_by_id(const struct fwnode_handle *fwnode, int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint); +typedef void *(*devcon_match_fn_t)(struct fwnode_handle *fwnode, const char *id, + void *data); + +void *fwnode_connection_find_match(struct fwnode_handle *fwnode, + const char *con_id, void *data, + devcon_match_fn_t match); + +static inline void *device_connection_find_match(struct device *dev, + const char *con_id, void *data, + devcon_match_fn_t match) +{ + return fwnode_connection_find_match(dev_fwnode(dev), con_id, data, match); +} + /* -------------------------------------------------------------------------- */ /* Software fwnode support - when HW description is incomplete or missing */ -- cgit v1.2.3 From 18efb2f9e897ac65e7a1b2892f4a53e404534eba Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 29 Jul 2020 10:58:29 -0700 Subject: test_firmware: Test platform fw loading on non-EFI systems On non-EFI systems, it wasn't possible to test the platform firmware loader because it will have never set "checked_fw" during __init. Instead, allow the test code to override this check. Additionally split the declarations into a private header file so it there is greater enforcement of the symbol visibility. Fixes: 548193cba2a7 ("test_firmware: add support for firmware_request_platform") Cc: stable@vger.kernel.org Reviewed-by: Luis Chamberlain Acked-by: Scott Branden Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20200729175845.1745471-2-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/efi_embedded_fw.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi_embedded_fw.h b/include/linux/efi_embedded_fw.h index 57eac5241303..4ad5db9f5312 100644 --- a/include/linux/efi_embedded_fw.h +++ b/include/linux/efi_embedded_fw.h @@ -7,19 +7,6 @@ #define EFI_EMBEDDED_FW_PREFIX_LEN 8 -/* - * This struct and efi_embedded_fw_list are private to the efi-embedded fw - * implementation they are in this header for use by lib/test_firmware.c only! - */ -struct efi_embedded_fw { - struct list_head list; - const char *name; - const u8 *data; - size_t length; -}; - -extern struct list_head efi_embedded_fw_list; - /** * struct efi_embedded_fw_desc - This struct is used by the EFI embedded-fw * code to search for embedded firmwares. -- cgit v1.2.3 From f82485722e5de5ebb08d3a1dd7302203346dbff9 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 24 Aug 2020 19:38:57 +0200 Subject: devres: provide devm_krealloc() Implement the managed variant of krealloc(). This function works with all memory allocated by devm_kmalloc() (or devres functions using it implicitly like devm_kmemdup(), devm_kstrdup() etc.). Managed realloc'ed chunks can be manually released with devm_kfree(). Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200824173859.4910-2-brgl@bgdev.pl Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index bf480dbe3375..85d5c28bed93 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -206,6 +206,8 @@ int devres_release_group(struct device *dev, void *id); /* managed devm_k.alloc/kfree for device drivers */ void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __malloc; +void *devm_krealloc(struct device *dev, void *ptr, size_t size, + gfp_t gfp) __must_check; __printf(3, 0) char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, va_list ap) __malloc; __printf(3, 4) char *devm_kasprintf(struct device *dev, gfp_t gfp, -- cgit v1.2.3 From ab5fe88aef4b4837bc2f92fc9f6f9c9b75ef633b Mon Sep 17 00:00:00 2001 From: Vaibhav Gupta Date: Thu, 20 Aug 2020 00:26:45 +0530 Subject: fbdev: via-core: use generic power management Drivers should do only device-specific jobs. But in general, drivers using legacy PCI PM framework for .suspend()/.resume() have to manage many PCI PM-related tasks themselves which can be done by PCI Core itself. This brings extra load on the driver and it directly calls PCI helper functions to handle them. Switch to the new generic framework by updating function signatures and define a "struct dev_pm_ops" variable to bind PM callbacks. The via_suspend() is designed to function only in the case of Suspend. Thus, the code checked for "if (state.event != PM_EVENT_SUSPEND)". This is because, in the legacy framework, this callback was invoked even in the event of Freeze and Hibernate. Hence, added the load of unnecessary function-calls. The goal can be achieved by binding the callback with only "via_pm_ops.suspend" in the new framework. This also avoids the step of checking "if (state.event != PM_EVENT_SUSPEND)" every time the callback is invoked. Signed-off-by: Vaibhav Gupta Cc: Bjorn Helgaas Cc: Bjorn Helgaas Cc: Bjorn Helgaas Cc: Vaibhav Gupta Cc: Sam Ravnborg Cc: Paul Mackerras Cc: Russell King Cc: Andres Salomon CC: Antonino Daplas Cc: Shuah Khan Signed-off-by: Bartlomiej Zolnierkiewicz Link: https://patchwork.freedesktop.org/patch/msgid/20200819185654.151170-4-vaibhavgupta40@gmail.com --- include/linux/via-core.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/via-core.h b/include/linux/via-core.h index 9e802deedb2d..8737599b9148 100644 --- a/include/linux/via-core.h +++ b/include/linux/via-core.h @@ -47,7 +47,6 @@ struct via_port_cfg { /* * Allow subdevs to register suspend/resume hooks. */ -#ifdef CONFIG_PM struct viafb_pm_hooks { struct list_head list; int (*suspend)(void *private); @@ -57,7 +56,6 @@ struct viafb_pm_hooks { void viafb_pm_register(struct viafb_pm_hooks *hooks); void viafb_pm_unregister(struct viafb_pm_hooks *hooks); -#endif /* CONFIG_PM */ /* * This is the global viafb "device" containing stuff needed by -- cgit v1.2.3 From e2028c8e6bf9a12dfe83fc12ce6d5d9ab1628b0b Mon Sep 17 00:00:00 2001 From: Sven Schneider Date: Thu, 20 Aug 2020 10:21:37 +0200 Subject: lib/fonts: add font 6x8 for OLED display This font is derived from lib/fonts/font_6x10.c and is useful for small OLED displays Signed-off-by: Sven Schneider Signed-off-by: Sascha Hauer Reviewed-by: Greg Kroah-Hartman Signed-off-by: Bartlomiej Zolnierkiewicz Link: https://patchwork.freedesktop.org/patch/msgid/20200820082137.5907-1-s.hauer@pengutronix.de --- include/linux/font.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/font.h b/include/linux/font.h index 51b91c8b69d5..4a3f8741bb7e 100644 --- a/include/linux/font.h +++ b/include/linux/font.h @@ -33,6 +33,7 @@ struct font_desc { #define MINI4x6_IDX 9 #define FONT6x10_IDX 10 #define TER16x32_IDX 11 +#define FONT6x8_IDX 12 extern const struct font_desc font_vga_8x8, font_vga_8x16, @@ -45,7 +46,8 @@ extern const struct font_desc font_vga_8x8, font_acorn_8x8, font_mini_4x6, font_6x10, - font_ter_16x32; + font_ter_16x32, + font_6x8; /* Find a font with a specific name */ -- cgit v1.2.3 From de394e7568ce2cdb4643ed230169f484f25f9442 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Tue, 8 Sep 2020 12:57:43 +0200 Subject: tracepoint: Fix overly long tracepoint names Stephen Rothwell reported: > Exported symbols need to be <= (64 - sizeof(Elf_Addr)) long. This is > presumably 56 on 64 bit arches and the above symbol (including the '.') > is 56 characters long. Shorten the tracepoint symbol name. Fixes: d25e37d89dd2 ("tracepoint: Optimize using static_call()") Reported-by: Stephen Rothwell Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200908105743.GW2674@hirez.programming.kicks-ass.net --- include/linux/tracepoint.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 3722a10fc46d..81fa0b2f271e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -154,7 +154,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #ifdef CONFIG_HAVE_STATIC_CALL #define __DO_TRACE_CALL(name) static_call(tp_func_##name) #else -#define __DO_TRACE_CALL(name) __tracepoint_iter_##name +#define __DO_TRACE_CALL(name) __traceiter_##name #endif /* CONFIG_HAVE_STATIC_CALL */ /* @@ -232,8 +232,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * poking RCU a bit. */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ - extern int __tracepoint_iter_##name(data_proto); \ - DECLARE_STATIC_CALL(tp_func_##name, __tracepoint_iter_##name); \ + extern int __traceiter_##name(data_proto); \ + DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name); \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ @@ -288,19 +288,19 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) static const char __tpstrtab_##_name[] \ __section(__tracepoints_strings) = #_name; \ extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name); \ - int __tracepoint_iter_##_name(void *__data, proto); \ + int __traceiter_##_name(void *__data, proto); \ struct tracepoint __tracepoint_##_name __used \ __section(__tracepoints) = { \ .name = __tpstrtab_##_name, \ .key = STATIC_KEY_INIT_FALSE, \ .static_call_key = &STATIC_CALL_KEY(tp_func_##_name), \ .static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \ - .iterator = &__tracepoint_iter_##_name, \ + .iterator = &__traceiter_##_name, \ .regfunc = _reg, \ .unregfunc = _unreg, \ .funcs = NULL }; \ __TRACEPOINT_ENTRY(_name); \ - int __tracepoint_iter_##_name(void *__data, proto) \ + int __traceiter_##_name(void *__data, proto) \ { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ @@ -314,18 +314,18 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) } while ((++it_func_ptr)->func); \ return 0; \ } \ - DEFINE_STATIC_CALL(tp_func_##_name, __tracepoint_iter_##_name); + DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name); #define DEFINE_TRACE(name, proto, args) \ DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)); #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ EXPORT_SYMBOL_GPL(__tracepoint_##name); \ - EXPORT_SYMBOL_GPL(__tracepoint_iter_##name); \ + EXPORT_SYMBOL_GPL(__traceiter_##name); \ EXPORT_STATIC_CALL_GPL(tp_func_##name) #define EXPORT_TRACEPOINT_SYMBOL(name) \ EXPORT_SYMBOL(__tracepoint_##name); \ - EXPORT_SYMBOL(__tracepoint_iter_##name); \ + EXPORT_SYMBOL(__traceiter_##name); \ EXPORT_STATIC_CALL(tp_func_##name) -- cgit v1.2.3 From b41b0ce5982693e27307cfe0aaf49bc8e3a20900 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Wed, 29 Jul 2020 15:34:39 +0300 Subject: interconnect: Add bulk API helpers There are drivers which just need to get multiple interconnect paths, request some predefined amounts of bandwidth and then just toggle the paths between enabled/disabled state. The aim of this patch is simplify the above and to allow drivers to put all the path names and bandwidth data into a single static icc_bulk_data table and call the icc_bulk_* functions on that table in order to scale all the interconnect paths in parallel. Suggested-by: Evan Green Suggested-by: Bjorn Andersson Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20200729123439.9961-1-georgi.djakov@linaro.org Signed-off-by: Georgi Djakov --- include/linux/interconnect.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h index 3a63d98613fc..f2dd2fc8d3cd 100644 --- a/include/linux/interconnect.h +++ b/include/linux/interconnect.h @@ -23,6 +23,28 @@ struct icc_path; struct device; +/** + * struct icc_bulk_data - Data used for bulk icc operations. + * + * @path: reference to the interconnect path (internal use) + * @name: the name from the "interconnect-names" DT property + * @avg_bw: average bandwidth in icc units + * @peak_bw: peak bandwidth in icc units + */ +struct icc_bulk_data { + struct icc_path *path; + const char *name; + u32 avg_bw; + u32 peak_bw; +}; + +int __must_check of_icc_bulk_get(struct device *dev, int num_paths, + struct icc_bulk_data *paths); +void icc_bulk_put(int num_paths, struct icc_bulk_data *paths); +int icc_bulk_set_bw(int num_paths, const struct icc_bulk_data *paths); +int icc_bulk_enable(int num_paths, const struct icc_bulk_data *paths); +void icc_bulk_disable(int num_paths, const struct icc_bulk_data *paths); + #if IS_ENABLED(CONFIG_INTERCONNECT) struct icc_path *icc_get(struct device *dev, const int src_id, -- cgit v1.2.3 From 1521e22bfa12db6225002ba3b040572a78dff996 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Thu, 3 Sep 2020 16:31:28 +0300 Subject: interconnect: Introduce xlate_extended() callback Currently there is the xlate() callback, which is used by providers for mapping the nodes from phandle arguments. That's fine for simple mappings, but the phandle arguments could contain an additional data, such as tag information. Let's create another callback xlate_extended() for the cases where providers want also populate the path tag data. Tested-by: Sibi Sankar Reviewed-by: Sibi Sankar Reviewed-by: Matthias Kaehlcke Tested-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/20200903133134.17201-2-georgi.djakov@linaro.org Signed-off-by: Georgi Djakov --- include/linux/interconnect-provider.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index 4735518de515..4d535fddd5d3 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -14,6 +14,17 @@ struct icc_node; struct of_phandle_args; +/** + * struct icc_node_data - icc node data + * + * @node: icc node + * @tag: tag + */ +struct icc_node_data { + struct icc_node *node; + u32 tag; +}; + /** * struct icc_onecell_data - driver data for onecell interconnect providers * @@ -39,6 +50,7 @@ struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec, * @pre_aggregate: pointer to device specific function that is called * before the aggregation begins (optional) * @xlate: provider-specific callback for mapping nodes from phandle arguments + * @xlate_extended: vendor-specific callback for mapping node data from phandle arguments * @dev: the device this interconnect provider belongs to * @users: count of active users * @inter_set: whether inter-provider pairs will be configured with @set @@ -52,6 +64,7 @@ struct icc_provider { u32 peak_bw, u32 *agg_avg, u32 *agg_peak); void (*pre_aggregate)(struct icc_node *node); struct icc_node* (*xlate)(struct of_phandle_args *spec, void *data); + struct icc_node_data* (*xlate_extended)(struct of_phandle_args *spec, void *data); struct device *dev; int users; bool inter_set; @@ -105,7 +118,7 @@ void icc_node_del(struct icc_node *node); int icc_nodes_remove(struct icc_provider *provider); int icc_provider_add(struct icc_provider *provider); int icc_provider_del(struct icc_provider *provider); -struct icc_node *of_icc_get_from_provider(struct of_phandle_args *spec); +struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec); #else @@ -157,7 +170,7 @@ static inline int icc_provider_del(struct icc_provider *provider) return -ENOTSUPP; } -static inline struct icc_node *of_icc_get_from_provider(struct of_phandle_args *spec) +static inline struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec) { return ERR_PTR(-ENOTSUPP); } -- cgit v1.2.3 From a8803055127afb87640974adedac60435592b86d Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 7 Sep 2020 18:46:55 +0100 Subject: firmware: arm_scmi: Add system power protocol support Add bare protocol support for SCMI system power protocol as needed by an OSPM agent: basic initialization and SYSTEM_POWER_STATE_NOTIFIER core notification support. No event-handling logic is attached to such notification.. Link: https://lore.kernel.org/r/20200907174657.32466-2-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 05570afc7f74..4b10093ad671 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -292,6 +292,7 @@ struct scmi_handle { void *sensor_priv; void *reset_priv; void *notify_priv; + void *system_priv; }; enum scmi_std_protocol { @@ -304,6 +305,15 @@ enum scmi_std_protocol { SCMI_PROTOCOL_RESET = 0x16, }; +enum scmi_system_events { + SCMI_SYSTEM_SHUTDOWN, + SCMI_SYSTEM_COLDRESET, + SCMI_SYSTEM_WARMRESET, + SCMI_SYSTEM_POWERUP, + SCMI_SYSTEM_SUSPEND, + SCMI_SYSTEM_MAX +}; + struct scmi_device { u32 id; u8 protocol_id; @@ -378,6 +388,7 @@ enum scmi_notification_events { SCMI_EVENT_SENSOR_TRIP_POINT_EVENT = 0x0, SCMI_EVENT_RESET_ISSUED = 0x0, SCMI_EVENT_BASE_ERROR_EVENT = 0x0, + SCMI_EVENT_SYSTEM_POWER_STATE_NOTIFIER = 0x0, }; struct scmi_power_state_changed_report { @@ -387,6 +398,13 @@ struct scmi_power_state_changed_report { unsigned int power_state; }; +struct scmi_system_power_state_notifier_report { + ktime_t timestamp; + unsigned int agent_id; + unsigned int flags; + unsigned int system_state; +}; + struct scmi_perf_limits_report { ktime_t timestamp; unsigned int agent_id; -- cgit v1.2.3 From 4bd6a7353ee14697fea645e941354976d2c4a452 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 3 Sep 2020 16:22:32 +0200 Subject: sysctl: Convert to iter interfaces Using the read_iter/write_iter interfaces allows for in-kernel users to set sysctls without using set_fs(). Also, the buffer is a string, so give it the real type of 'char *', not void *. [AV: Christoph's fixup folded in] Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/bpf-cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 64f367044e25..82b26a1386d8 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -136,7 +136,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, struct ctl_table *table, int write, - void **buf, size_t *pcount, loff_t *ppos, + char **buf, size_t *pcount, loff_t *ppos, enum bpf_attach_type type); int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, -- cgit v1.2.3 From 36e2c7421f02a22f71c9283e55fdb672a9eb58e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:34 +0200 Subject: fs: don't allow splice read/write without explicit ops default_file_splice_write is the last piece of generic code that uses set_fs to make the uaccess routines operate on kernel pointers. It implements a "fallback loop" for splicing from files that do not actually provide a proper splice_read method. The usual file systems and other high bandwidth instances all provide a ->splice_read, so this just removes support for various device drivers and procfs/debugfs files. If splice support for any of those turns out to be important it can be added back by switching them to the iter ops and using generic_file_splice_read. Signed-off-by: Christoph Hellwig Reviewed-by: Kees Cook Signed-off-by: Al Viro --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e019ea2f1347..d33cc3e8ed41 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1894,8 +1894,6 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); -extern ssize_t vfs_readv(struct file *, const struct iovec __user *, - unsigned long, loff_t *, rwf_t); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, -- cgit v1.2.3 From 5e6e9852d6f76e01b2e6803c74258afa5b432bc5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:35 +0200 Subject: uaccess: add infrastructure for kernel builds with set_fs() Add a CONFIG_SET_FS option that is selected by architecturess that implement set_fs, which is all of them initially. If the option is not set stubs for routines related to overriding the address space are provided so that architectures can start to opt out of providing set_fs. Signed-off-by: Christoph Hellwig Reviewed-by: Kees Cook Signed-off-by: Al Viro --- include/linux/uaccess.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 94b285411659..70073c802b48 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -8,6 +8,7 @@ #include +#ifdef CONFIG_SET_FS /* * Force the uaccess routines to be wired up for actual userspace access, * overriding any possible set_fs(KERNEL_DS) still lingering around. Undone @@ -25,6 +26,23 @@ static inline void force_uaccess_end(mm_segment_t oldfs) { set_fs(oldfs); } +#else /* CONFIG_SET_FS */ +typedef struct { + /* empty dummy */ +} mm_segment_t; + +#define uaccess_kernel() (false) +#define user_addr_max() (TASK_SIZE_MAX) + +static inline mm_segment_t force_uaccess_begin(void) +{ + return (mm_segment_t) { }; +} + +static inline void force_uaccess_end(mm_segment_t oldfs) +{ +} +#endif /* CONFIG_SET_FS */ /* * Architectures should provide two primitives (raw_copy_{to,from}_user()) -- cgit v1.2.3 From 7c69898b86b45842e1c2799df845e203c71a667e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 9 Sep 2020 09:25:33 +0200 Subject: Revert "test_firmware: Test platform fw loading on non-EFI systems" This reverts commit 18efb2f9e897ac65e7a1b2892f4a53e404534eba as it is reported to break the build: https://lore.kernel.org/r/20200909154709.619fe9bb@canb.auug.org.au Reported-by: Stephen Rothwell Fixes: 18efb2f9e897 ("test_firmware: Test platform fw loading on non-EFI systems") Cc: stable@vger.kernel.org Cc: Luis Chamberlain Cc: Scott Branden Cc: Kees Cook Link: https://lore.kernel.org/r/20200909154709.619fe9bb@canb.auug.org.au Signed-off-by: Greg Kroah-Hartman --- include/linux/efi_embedded_fw.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi_embedded_fw.h b/include/linux/efi_embedded_fw.h index 4ad5db9f5312..57eac5241303 100644 --- a/include/linux/efi_embedded_fw.h +++ b/include/linux/efi_embedded_fw.h @@ -7,6 +7,19 @@ #define EFI_EMBEDDED_FW_PREFIX_LEN 8 +/* + * This struct and efi_embedded_fw_list are private to the efi-embedded fw + * implementation they are in this header for use by lib/test_firmware.c only! + */ +struct efi_embedded_fw { + struct list_head list; + const char *name; + const u8 *data; + size_t length; +}; + +extern struct list_head efi_embedded_fw_list; + /** * struct efi_embedded_fw_desc - This struct is used by the EFI embedded-fw * code to search for embedded firmwares. -- cgit v1.2.3 From f601e8f37c2c1c52f2923fffc48204a7f7dc023d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 9 Sep 2020 09:37:40 +0200 Subject: Revert "driver core: Annotate dev_err_probe() with __must_check" This reverts commit e1f82a0dcf388d98bcc7ad195c03bd812405e6b2 as it's already starting to cause build warnings in linux-next for things that are "obviously correct". It's up to driver authors do "do the right thing" here with this function, and if they don't want to call it as the last line of a function, that's up to them, otherwise code that looks like: ret = dev_err_probe(..., ret, ...); does look really "odd". Reported-by: Stephen Rothwell Reported-by: Krzysztof Kozlowski Fixes: e1f82a0dcf38 ("driver core: Annotate dev_err_probe() with __must_check") Cc: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 85d5c28bed93..7a0938cf8bd7 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -924,7 +924,7 @@ void device_links_supplier_sync_state_pause(void); void device_links_supplier_sync_state_resume(void); extern __printf(3, 4) -int __must_check dev_err_probe(const struct device *dev, int err, const char *fmt, ...); +int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ -- cgit v1.2.3 From 2a71593da34d473461f2f5c3dbb53b883596188a Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 3 Aug 2020 07:17:55 +0200 Subject: i2c: smbus: add core function handling SMBus host-notify SMBus Host-Notify protocol, from the adapter point of view consist of receiving a message from a client, including the client address and some other data. It can be simply handled by creating a new slave device and registering a callback performing the parsing of the message received from the client. This commit introduces two new core functions * i2c_new_slave_host_notify_device * i2c_free_slave_host_notify_device that take care of registration of the new slave device and callback and will call i2c_handle_smbus_host_notify once a Host-Notify event is received. Signed-off-by: Alain Volmat Reviewed-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang --- include/linux/i2c-smbus.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h index 1e4e0de4ef8b..1ef421818d3a 100644 --- a/include/linux/i2c-smbus.h +++ b/include/linux/i2c-smbus.h @@ -38,6 +38,18 @@ static inline int of_i2c_setup_smbus_alert(struct i2c_adapter *adap) return 0; } #endif +#if IS_ENABLED(CONFIG_I2C_SMBUS) && IS_ENABLED(CONFIG_I2C_SLAVE) +struct i2c_client *i2c_new_slave_host_notify_device(struct i2c_adapter *adapter); +void i2c_free_slave_host_notify_device(struct i2c_client *client); +#else +static inline struct i2c_client *i2c_new_slave_host_notify_device(struct i2c_adapter *adapter) +{ + return ERR_PTR(-ENOSYS); +} +static inline void i2c_free_slave_host_notify_device(struct i2c_client *client) +{ +} +#endif #if IS_ENABLED(CONFIG_I2C_SMBUS) && IS_ENABLED(CONFIG_DMI) void i2c_register_spd(struct i2c_adapter *adap); -- cgit v1.2.3 From d284c16f84c9f8facdde25c9c077cfdcb66163d5 Mon Sep 17 00:00:00 2001 From: dillon min Date: Thu, 3 Sep 2020 15:30:22 +0800 Subject: gpio: tc35894: Disable Direct KBD interrupts to enable gpio irq On tc35894, have to disable direct keypad interrupts to make it as general purpose interrupts functionality work. if not, after chip reset, IRQST(0x91) will always 0x20, IRQN always low level, can't be clear. Configure DIRECTx to enable general purpose gpio mode, else read GPIOMISx register always zero in irq routine. verified on tc35894, need more test on other tc3589x. Signed-off-by: dillon min Acked-by: Lee Jones Signed-off-by: Bartosz Golaszewski --- include/linux/mfd/tc3589x.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/tc3589x.h b/include/linux/mfd/tc3589x.h index bb2b19599761..b84955410e03 100644 --- a/include/linux/mfd/tc3589x.h +++ b/include/linux/mfd/tc3589x.h @@ -19,6 +19,9 @@ enum tx3589x_block { #define TC3589x_RSTCTRL_KBDRST (1 << 1) #define TC3589x_RSTCTRL_GPIRST (1 << 0) +#define TC3589x_DKBDMSK_ELINT (1 << 1) +#define TC3589x_DKBDMSK_EINT (1 << 0) + /* Keyboard Configuration Registers */ #define TC3589x_KBDSETTLE_REG 0x01 #define TC3589x_KBDBOUNCE 0x02 @@ -101,6 +104,9 @@ enum tx3589x_block { #define TC3589x_GPIOODM2 0xE4 #define TC3589x_GPIOODE2 0xE5 +#define TC3589x_DIRECT0 0xEC +#define TC3589x_DKBDMSK 0xF3 + #define TC3589x_INT_GPIIRQ 0 #define TC3589x_INT_TI0IRQ 1 #define TC3589x_INT_TI1IRQ 2 -- cgit v1.2.3 From 2acd30b9f6032c6cbefc5e255c17ebbb0718e56a Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 8 Sep 2020 21:45:15 +0800 Subject: ASoC/soundwire: bus: use property to set interrupt masks Add a slave-level property and program the SCP_INT1_MASK as desired by the codec driver. Since there is no DisCo property this has to be an implementation-specific firmware property or hard-coded in the driver. The only functionality change is that implementation-defined interrupts are no longer set for amplifiers - those interrupts are typically for jack detection or acoustic event detection/hotwording. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Tested-by: Srinivas Kandagatla Reviewed-by: Kai Vehmanen Reviewed-by: Guennadi Liakhovetski Acked-by: Mark Brown Link: https://lore.kernel.org/r/20200908134521.6781-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 1e9010c139f0..9d94cdf6346f 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -357,6 +357,7 @@ struct sdw_dpn_prop { * @dp0_prop: Data Port 0 properties * @src_dpn_prop: Source Data Port N properties * @sink_dpn_prop: Sink Data Port N properties + * @scp_int1_mask: SCP_INT1_MASK desired settings */ struct sdw_slave_prop { u32 mipi_revision; @@ -378,6 +379,7 @@ struct sdw_slave_prop { struct sdw_dp0_prop *dp0_prop; struct sdw_dpn_prop *src_dpn_prop; struct sdw_dpn_prop *sink_dpn_prop; + u8 scp_int1_mask; }; /** -- cgit v1.2.3 From c2819e196b3cc1901a4612f72e66da4821966a5e Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 8 Sep 2020 21:45:17 +0800 Subject: soundwire: slave: add first_interrupt_done status Some Slaves report incorrect information in their interrupt status registers after a master/bus reset, track the initial interrupt handling so that quirks can be introduced to filter out incorrect information while keeping interrupts enabled in steady state. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Kai Vehmanen Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20200908134521.6781-4-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 9d94cdf6346f..2b93a8ef7fad 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -616,6 +616,8 @@ struct sdw_slave_ops { * between the Master suspending and the codec resuming, and make sure that * when the Master triggered a reset the Slave is properly enumerated and * initialized + * @first_interrupt_done: status flag tracking if the interrupt handling + * for a Slave happens for the first time after enumeration */ struct sdw_slave { struct sdw_slave_id id; @@ -637,6 +639,7 @@ struct sdw_slave { struct completion enumeration_complete; struct completion initialization_complete; u32 unattach_request; + bool first_interrupt_done; }; #define dev_to_sdw_dev(_dev) container_of(_dev, struct sdw_slave, dev) -- cgit v1.2.3 From 4724f12c1315efa79a0cbf74dfb0c9b98b1a4bff Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 8 Sep 2020 21:45:18 +0800 Subject: soundwire: bus: use quirk to filter out invalid parity errors If a Slave device reports with a quirk that its initial parity check may be incorrect, filter it but keep the parity checks active in steady state. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Kai Vehmanen Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20200908134521.6781-5-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 2b93a8ef7fad..790823d2d33b 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -358,6 +358,7 @@ struct sdw_dpn_prop { * @src_dpn_prop: Source Data Port N properties * @sink_dpn_prop: Sink Data Port N properties * @scp_int1_mask: SCP_INT1_MASK desired settings + * @quirks: bitmask identifying deltas from the MIPI specification */ struct sdw_slave_prop { u32 mipi_revision; @@ -380,8 +381,11 @@ struct sdw_slave_prop { struct sdw_dpn_prop *src_dpn_prop; struct sdw_dpn_prop *sink_dpn_prop; u8 scp_int1_mask; + u32 quirks; }; +#define SDW_SLAVE_QUIRKS_INVALID_INITIAL_PARITY BIT(0) + /** * struct sdw_master_prop - Master properties * @revision: MIPI spec version of the implementation -- cgit v1.2.3 From 5bfe37ca8ac8e9176bfd923d0a83802b7305d2f5 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Wed, 19 Aug 2020 18:19:41 -0400 Subject: virtio: Add get_shm_region method Virtio defines 'shared memory regions' that provide a continuously shared region between the host and guest. Provide a method to find a particular region on a device. Signed-off-by: Sebastien Boeuf Signed-off-by: Dr. David Alan Gilbert Acked-by: Michael S. Tsirkin Cc: kvm@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Cc: "Michael S. Tsirkin" Signed-off-by: Miklos Szeredi --- include/linux/virtio_config.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 8fe857e27ef3..4b8e38c5c4d8 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -11,6 +11,11 @@ struct irq_affinity; +struct virtio_shm_region { + u64 addr; + u64 len; +}; + /** * virtio_config_ops - operations for configuring a virtio device * Note: Do not assume that a transport implements all of the operations @@ -66,6 +71,7 @@ struct irq_affinity; * the caller can then copy. * @set_vq_affinity: set the affinity for a virtqueue (optional). * @get_vq_affinity: get the affinity for a virtqueue (optional). + * @get_shm_region: get a shared memory region based on the index. */ typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { @@ -89,6 +95,8 @@ struct virtio_config_ops { const struct cpumask *cpu_mask); const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev, int index); + bool (*get_shm_region)(struct virtio_device *vdev, + struct virtio_shm_region *region, u8 id); }; /* If driver didn't advertise the feature, it will never appear. */ @@ -251,6 +259,15 @@ int virtqueue_set_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask) return 0; } +static inline +bool virtio_get_shm_region(struct virtio_device *vdev, + struct virtio_shm_region *region, u8 id) +{ + if (!vdev->config->get_shm_region) + return false; + return vdev->config->get_shm_region(vdev, region, id); +} + static inline bool virtio_is_little_endian(struct virtio_device *vdev) { return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) || -- cgit v1.2.3 From 80793c3471d90d4dc2b48deadb6413bdfe39500f Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Thu, 27 Aug 2020 13:40:39 +0200 Subject: seqlock: Introduce seqcount_latch_t Latch sequence counters are a multiversion concurrency control mechanism where the seqcount_t counter even/odd value is used to switch between two copies of protected data. This allows the seqcount_t read path to safely interrupt its write side critical section (e.g. from NMIs). Initially, latch sequence counters were implemented as a single write function above plain seqcount_t: raw_write_seqcount_latch(). The read side was expected to use plain seqcount_t raw_read_seqcount(). A specialized latch read function, raw_read_seqcount_latch(), was later added. It became the standardized way for latch read paths. Due to the dependent load, it has one read memory barrier less than the plain seqcount_t raw_read_seqcount() API. Only raw_write_seqcount_latch() and raw_read_seqcount_latch() should be used with latch sequence counters. Having *unique* read and write path APIs means that latch sequence counters are actually a data type of their own -- just inappropriately overloading plain seqcount_t. Introduce seqcount_latch_t. This adds type-safety and ensures that only the correct latch-safe APIs are to be used. Not to break bisection, let the latch APIs also accept plain seqcount_t or seqcount_raw_spinlock_t. After converting all call sites to seqcount_latch_t, only that new data type will be allowed. References: 9b0fd802e8c0 ("seqcount: Add raw_write_seqcount_latch()") References: 7fc26327b756 ("seqlock: Introduce raw_read_seqcount_latch()") References: aadd6e5caaac ("time/sched_clock: Use raw_read_seqcount_latch()") Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200827114044.11173-4-a.darwish@linutronix.de --- include/linux/seqlock.h | 104 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 73 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 300cbf312546..88b917d4ebde 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -587,34 +587,76 @@ static inline void write_seqcount_t_invalidate(seqcount_t *s) kcsan_nestable_atomic_end(); } -/** - * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants +/* + * Latch sequence counters (seqcount_latch_t) * - * Use seqcount_t latching to switch between two storage places protected - * by a sequence counter. Doing so allows having interruptible, preemptible, - * seqcount_t write side critical sections. + * A sequence counter variant where the counter even/odd value is used to + * switch between two copies of protected data. This allows the read path, + * typically NMIs, to safely interrupt the write side critical section. * - * Check raw_write_seqcount_latch() for more details and a full reader and - * writer usage example. + * As the write sections are fully preemptible, no special handling for + * PREEMPT_RT is needed. + */ +typedef struct { + seqcount_t seqcount; +} seqcount_latch_t; + +/** + * SEQCNT_LATCH_ZERO() - static initializer for seqcount_latch_t + * @seq_name: Name of the seqcount_latch_t instance + */ +#define SEQCNT_LATCH_ZERO(seq_name) { \ + .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ +} + +/** + * seqcount_latch_init() - runtime initializer for seqcount_latch_t + * @s: Pointer to the seqcount_latch_t instance + */ +static inline void seqcount_latch_init(seqcount_latch_t *s) +{ + seqcount_init(&s->seqcount); +} + +/** + * raw_read_seqcount_latch() - pick even/odd latch data copy + * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t + * + * See raw_write_seqcount_latch() for details and a full reader/writer + * usage example. * * Return: sequence counter raw value. Use the lowest bit as an index for - * picking which data copy to read. The full counter value must then be - * checked with read_seqcount_retry(). + * picking which data copy to read. The full counter must then be checked + * with read_seqcount_latch_retry(). */ -#define raw_read_seqcount_latch(s) \ - raw_read_seqcount_t_latch(__seqcount_ptr(s)) +#define raw_read_seqcount_latch(s) \ +({ \ + /* \ + * Pairs with the first smp_wmb() in raw_write_seqcount_latch(). \ + * Due to the dependent load, a full smp_rmb() is not needed. \ + */ \ + _Generic(*(s), \ + seqcount_t: READ_ONCE(((seqcount_t *)s)->sequence), \ + seqcount_raw_spinlock_t: READ_ONCE(((seqcount_raw_spinlock_t *)s)->seqcount.sequence), \ + seqcount_latch_t: READ_ONCE(((seqcount_latch_t *)s)->seqcount.sequence)); \ +}) -static inline int raw_read_seqcount_t_latch(seqcount_t *s) +/** + * read_seqcount_latch_retry() - end a seqcount_latch_t read section + * @s: Pointer to seqcount_latch_t + * @start: count, from raw_read_seqcount_latch() + * + * Return: true if a read section retry is required, else false + */ +static inline int +read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) { - /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ - int seq = READ_ONCE(s->sequence); /* ^^^ */ - return seq; + return read_seqcount_retry(&s->seqcount, start); } /** - * raw_write_seqcount_latch() - redirect readers to even/odd copy - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * raw_write_seqcount_latch() - redirect latch readers to even/odd copy + * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t * * The latch technique is a multiversion concurrency control method that allows * queries during non-atomic modifications. If you can guarantee queries never @@ -633,7 +675,7 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s) * The basic form is a data structure like:: * * struct latch_struct { - * seqcount_t seq; + * seqcount_latch_t seq; * struct data_struct data[2]; * }; * @@ -643,13 +685,13 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s) * void latch_modify(struct latch_struct *latch, ...) * { * smp_wmb(); // Ensure that the last data[1] update is visible - * latch->seq++; + * latch->seq.sequence++; * smp_wmb(); // Ensure that the seqcount update is visible * * modify(latch->data[0], ...); * * smp_wmb(); // Ensure that the data[0] update is visible - * latch->seq++; + * latch->seq.sequence++; * smp_wmb(); // Ensure that the seqcount update is visible * * modify(latch->data[1], ...); @@ -668,8 +710,8 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s) * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); * - * // read_seqcount_retry() includes needed smp_rmb() - * } while (read_seqcount_retry(&latch->seq, seq)); + * // This includes needed smp_rmb() + * } while (read_seqcount_latch_retry(&latch->seq, seq)); * * return entry; * } @@ -693,14 +735,14 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s) * When data is a dynamic data structure; one should use regular RCU * patterns to manage the lifetimes of the objects within. */ -#define raw_write_seqcount_latch(s) \ - raw_write_seqcount_t_latch(__seqcount_ptr(s)) - -static inline void raw_write_seqcount_t_latch(seqcount_t *s) -{ - smp_wmb(); /* prior stores before incrementing "sequence" */ - s->sequence++; - smp_wmb(); /* increment "sequence" before following stores */ +#define raw_write_seqcount_latch(s) \ +{ \ + smp_wmb(); /* prior stores before incrementing "sequence" */ \ + _Generic(*(s), \ + seqcount_t: ((seqcount_t *)s)->sequence++, \ + seqcount_raw_spinlock_t:((seqcount_raw_spinlock_t *)s)->seqcount.sequence++, \ + seqcount_latch_t: ((seqcount_latch_t *)s)->seqcount.sequence++); \ + smp_wmb(); /* increment "sequence" before following stores */ \ } /* -- cgit v1.2.3 From 24bf401cebfd630cc9e2c3746e43945e836626f9 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Thu, 27 Aug 2020 13:40:43 +0200 Subject: rbtree_latch: Use seqcount_latch_t Latch sequence counters have unique read and write APIs, and thus seqcount_latch_t was recently introduced at seqlock.h. Use that new data type instead of plain seqcount_t. This adds the necessary type-safety and ensures that only latching-safe seqcount APIs are to be used. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200827114044.11173-8-a.darwish@linutronix.de --- include/linux/rbtree_latch.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h index 7d012faa509a..3d1a9e716b80 100644 --- a/include/linux/rbtree_latch.h +++ b/include/linux/rbtree_latch.h @@ -42,8 +42,8 @@ struct latch_tree_node { }; struct latch_tree_root { - seqcount_t seq; - struct rb_root tree[2]; + seqcount_latch_t seq; + struct rb_root tree[2]; }; /** @@ -206,7 +206,7 @@ latch_tree_find(void *key, struct latch_tree_root *root, do { seq = raw_read_seqcount_latch(&root->seq); node = __lt_find(key, root, seq & 1, ops->comp); - } while (read_seqcount_retry(&root->seq, seq)); + } while (read_seqcount_latch_retry(&root->seq, seq)); return node; } -- cgit v1.2.3 From 0c9794c8b6781eb7dad8e19b78c5d4557790597a Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Thu, 27 Aug 2020 13:40:44 +0200 Subject: seqlock: seqcount latch APIs: Only allow seqcount_latch_t All latch sequence counter call-sites have now been converted from plain seqcount_t to the new seqcount_latch_t data type. Enforce type-safety by modifying seqlock.h latch APIs to only accept seqcount_latch_t. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200827114044.11173-9-a.darwish@linutronix.de --- include/linux/seqlock.h | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 88b917d4ebde..f2a7a467e998 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -620,7 +620,7 @@ static inline void seqcount_latch_init(seqcount_latch_t *s) /** * raw_read_seqcount_latch() - pick even/odd latch data copy - * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t + * @s: Pointer to seqcount_latch_t * * See raw_write_seqcount_latch() for details and a full reader/writer * usage example. @@ -629,17 +629,14 @@ static inline void seqcount_latch_init(seqcount_latch_t *s) * picking which data copy to read. The full counter must then be checked * with read_seqcount_latch_retry(). */ -#define raw_read_seqcount_latch(s) \ -({ \ - /* \ - * Pairs with the first smp_wmb() in raw_write_seqcount_latch(). \ - * Due to the dependent load, a full smp_rmb() is not needed. \ - */ \ - _Generic(*(s), \ - seqcount_t: READ_ONCE(((seqcount_t *)s)->sequence), \ - seqcount_raw_spinlock_t: READ_ONCE(((seqcount_raw_spinlock_t *)s)->seqcount.sequence), \ - seqcount_latch_t: READ_ONCE(((seqcount_latch_t *)s)->seqcount.sequence)); \ -}) +static inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s) +{ + /* + * Pairs with the first smp_wmb() in raw_write_seqcount_latch(). + * Due to the dependent load, a full smp_rmb() is not needed. + */ + return READ_ONCE(s->seqcount.sequence); +} /** * read_seqcount_latch_retry() - end a seqcount_latch_t read section @@ -656,7 +653,7 @@ read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) /** * raw_write_seqcount_latch() - redirect latch readers to even/odd copy - * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t + * @s: Pointer to seqcount_latch_t * * The latch technique is a multiversion concurrency control method that allows * queries during non-atomic modifications. If you can guarantee queries never @@ -735,14 +732,11 @@ read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) * When data is a dynamic data structure; one should use regular RCU * patterns to manage the lifetimes of the objects within. */ -#define raw_write_seqcount_latch(s) \ -{ \ - smp_wmb(); /* prior stores before incrementing "sequence" */ \ - _Generic(*(s), \ - seqcount_t: ((seqcount_t *)s)->sequence++, \ - seqcount_raw_spinlock_t:((seqcount_raw_spinlock_t *)s)->seqcount.sequence++, \ - seqcount_latch_t: ((seqcount_latch_t *)s)->seqcount.sequence++); \ - smp_wmb(); /* increment "sequence" before following stores */ \ +static inline void raw_write_seqcount_latch(seqcount_latch_t *s) +{ + smp_wmb(); /* prior stores before incrementing "sequence" */ + s->seqcount.sequence++; + smp_wmb(); /* increment "sequence" before following stores */ } /* -- cgit v1.2.3 From 6dd699b13d53f26a7603702d8bada3482312df74 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 4 Sep 2020 17:32:27 +0200 Subject: seqlock: seqcount_LOCKNAME_t: Standardize naming convention At seqlock.h, sequence counters with associated locks are either called seqcount_LOCKNAME_t, seqcount_LOCKTYPE_t, or seqcount_locktype_t. Standardize on seqcount_LOCKNAME_t for all instances in comments, kernel-doc, and SEQCOUNT_LOCKNAME() generative macro paramters. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200904153231.11994-2-a.darwish@linutronix.de --- include/linux/seqlock.h | 79 +++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index f2a7a467e998..820ace2f5911 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -53,7 +53,7 @@ * * If the write serialization mechanism is one of the common kernel * locking primitives, use a sequence counter with associated lock - * (seqcount_LOCKTYPE_t) instead. + * (seqcount_LOCKNAME_t) instead. * * If it's desired to automatically handle the sequence counter writer * serialization and non-preemptibility requirements, use a sequential @@ -117,7 +117,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) #define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) } /* - * Sequence counters with associated locks (seqcount_LOCKTYPE_t) + * Sequence counters with associated locks (seqcount_LOCKNAME_t) * * A sequence counter which associates the lock used for writer * serialization at initialization time. This enables lockdep to validate @@ -138,30 +138,32 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) #endif /** - * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPE associated + * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated * @seqcount: The real sequence counter - * @lock: Pointer to the associated spinlock + * @lock: Pointer to the associated lock * - * A plain sequence counter with external writer synchronization by a - * spinlock. The spinlock is associated to the sequence count in the + * A plain sequence counter with external writer synchronization by + * LOCKNAME @lock. The lock is associated to the sequence counter in the * static initializer or init function. This enables lockdep to validate * that the write side critical section is properly serialized. + * + * LOCKNAME: raw_spinlock, spinlock, rwlock, mutex, or ww_mutex. */ /* * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t * @s: Pointer to the seqcount_LOCKNAME_t instance - * @lock: Pointer to the associated LOCKTYPE + * @lock: Pointer to the associated lock */ /* - * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKNAME_t and helpers - * @locktype: actual typename - * @lockname: name - * @preemptible: preemptibility of above locktype + * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers + * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t + * @locktype: LOCKNAME canonical C data type + * @preemptible: preemptibility of above lockname * @lockmember: argument for lockdep_assert_held() */ -#define SEQCOUNT_LOCKTYPE(locktype, lockname, preemptible, lockmember) \ +#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember) \ typedef struct seqcount_##lockname { \ seqcount_t seqcount; \ __SEQ_LOCK(locktype *lock); \ @@ -211,29 +213,28 @@ static inline void __seqcount_assert(seqcount_t *s) lockdep_assert_preemption_disabled(); } -SEQCOUNT_LOCKTYPE(raw_spinlock_t, raw_spinlock, false, s->lock) -SEQCOUNT_LOCKTYPE(spinlock_t, spinlock, false, s->lock) -SEQCOUNT_LOCKTYPE(rwlock_t, rwlock, false, s->lock) -SEQCOUNT_LOCKTYPE(struct mutex, mutex, true, s->lock) -SEQCOUNT_LOCKTYPE(struct ww_mutex, ww_mutex, true, &s->lock->base) +SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock) +SEQCOUNT_LOCKNAME(spinlock, spinlock_t, false, s->lock) +SEQCOUNT_LOCKNAME(rwlock, rwlock_t, false, s->lock) +SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock) +SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base) /* * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t * @name: Name of the seqcount_LOCKNAME_t instance - * @lock: Pointer to the associated LOCKTYPE + * @lock: Pointer to the associated LOCKNAME */ -#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) { \ +#define SEQCOUNT_LOCKNAME_ZERO(seq_name, assoc_lock) { \ .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ __SEQ_LOCK(.lock = (assoc_lock)) \ } -#define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) -#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) -#define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) -#define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) -#define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) - +#define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) +#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) +#define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) +#define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) +#define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define __seqprop_case(s, lockname, prop) \ seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s)) @@ -252,7 +253,7 @@ SEQCOUNT_LOCKTYPE(struct ww_mutex, ww_mutex, true, &s->lock->base) /** * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() * barrier. Callers should ensure that smp_rmb() or equivalent ordering is @@ -283,7 +284,7 @@ repeat: /** * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * Return: count to be passed to read_seqcount_retry() */ @@ -299,7 +300,7 @@ static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s) /** * read_seqcount_begin() - begin a seqcount_t read critical section - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * Return: count to be passed to read_seqcount_retry() */ @@ -314,7 +315,7 @@ static inline unsigned read_seqcount_t_begin(const seqcount_t *s) /** * raw_read_seqcount() - read the raw seqcount_t counter value - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * raw_read_seqcount opens a read critical section of the given * seqcount_t, without any lockdep checking, and without checking or @@ -337,7 +338,7 @@ static inline unsigned raw_read_seqcount_t(const seqcount_t *s) /** * raw_seqcount_begin() - begin a seqcount_t read critical section w/o * lockdep and w/o counter stabilization - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * raw_seqcount_begin opens a read critical section of the given * seqcount_t. Unlike read_seqcount_begin(), this function will not wait @@ -365,7 +366,7 @@ static inline unsigned raw_seqcount_t_begin(const seqcount_t *s) /** * __read_seqcount_retry() - end a seqcount_t read section w/o barrier - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * @start: count, from read_seqcount_begin() * * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() @@ -389,7 +390,7 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) /** * read_seqcount_retry() - end a seqcount_t read critical section - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * @start: count, from read_seqcount_begin() * * read_seqcount_retry closes the read critical section of given @@ -409,7 +410,7 @@ static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start) /** * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants */ #define raw_write_seqcount_begin(s) \ do { \ @@ -428,7 +429,7 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s) /** * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants */ #define raw_write_seqcount_end(s) \ do { \ @@ -448,7 +449,7 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s) /** * write_seqcount_begin_nested() - start a seqcount_t write section with * custom lockdep nesting level - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * @subclass: lockdep nesting level * * See Documentation/locking/lockdep-design.rst @@ -471,7 +472,7 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass) /** * write_seqcount_begin() - start a seqcount_t write side critical section - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * write_seqcount_begin opens a write side critical section of the given * seqcount_t. @@ -497,7 +498,7 @@ static inline void write_seqcount_t_begin(seqcount_t *s) /** * write_seqcount_end() - end a seqcount_t write side critical section - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * The write section must've been opened with write_seqcount_begin(). */ @@ -517,7 +518,7 @@ static inline void write_seqcount_t_end(seqcount_t *s) /** * raw_write_seqcount_barrier() - do a seqcount_t write barrier - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * This can be used to provide an ordering guarantee instead of the usual * consistency guarantee. It is one wmb cheaper, because it can collapse @@ -571,7 +572,7 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s) /** * write_seqcount_invalidate() - invalidate in-progress seqcount_t read * side operations - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * After write_seqcount_invalidate, no seqcount_t read side operations * will complete successfully and see data older than this. -- cgit v1.2.3 From 5cdd25572a29e46f932d3e6eedbd07429de66431 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 4 Sep 2020 17:32:28 +0200 Subject: seqlock: Use unique prefix for seqcount_t property accessors At seqlock.h, the following set of functions: - __seqcount_ptr() - __seqcount_preemptible() - __seqcount_assert() act as plain seqcount_t "property" accessors. Meanwhile, the following group: - __seqcount_ptr() - __seqcount_lock_preemptible() - __seqcount_assert_lock_held() act as the equivalent set, but in the generic form, taking either seqcount_t or any of the seqcount_LOCKNAME_t variants. This is quite confusing, especially the first member where it is called exactly the same in both groups. Differentiate the first group by using "__seqprop" as prefix, and also use that same prefix for all of seqcount_LOCKNAME_t property accessors. While at it, constify the property accessors first parameter when appropriate. References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks") Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200904153231.11994-3-a.darwish@linutronix.de --- include/linux/seqlock.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 820ace2f5911..0b4a22f33ac3 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -157,7 +157,9 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) */ /* - * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers + * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers + * seqprop_LOCKNAME_*() - Property accessors for seqcount_LOCKNAME_t + * * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t * @locktype: LOCKNAME canonical C data type * @preemptible: preemptibility of above lockname @@ -177,19 +179,19 @@ seqcount_##lockname##_init(seqcount_##lockname##_t *s, locktype *lock) \ } \ \ static __always_inline seqcount_t * \ -__seqcount_##lockname##_ptr(seqcount_##lockname##_t *s) \ +__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ { \ return &s->seqcount; \ } \ \ static __always_inline bool \ -__seqcount_##lockname##_preemptible(seqcount_##lockname##_t *s) \ +__seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ { \ return preemptible; \ } \ \ static __always_inline void \ -__seqcount_##lockname##_assert(seqcount_##lockname##_t *s) \ +__seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ { \ __SEQ_LOCK(lockdep_assert_held(lockmember)); \ } @@ -198,17 +200,17 @@ __seqcount_##lockname##_assert(seqcount_##lockname##_t *s) \ * __seqprop() for seqcount_t */ -static inline seqcount_t *__seqcount_ptr(seqcount_t *s) +static inline seqcount_t *__seqprop_ptr(seqcount_t *s) { return s; } -static inline bool __seqcount_preemptible(seqcount_t *s) +static inline bool __seqprop_preemptible(const seqcount_t *s) { return false; } -static inline void __seqcount_assert(seqcount_t *s) +static inline void __seqprop_assert(const seqcount_t *s) { lockdep_assert_preemption_disabled(); } @@ -237,10 +239,10 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base) #define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define __seqprop_case(s, lockname, prop) \ - seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s)) + seqcount_##lockname##_t: __seqprop_##lockname##_##prop((void *)(s)) #define __seqprop(s, prop) _Generic(*(s), \ - seqcount_t: __seqcount_##prop((void *)(s)), \ + seqcount_t: __seqprop_##prop((void *)(s)), \ __seqprop_case((s), raw_spinlock, prop), \ __seqprop_case((s), spinlock, prop), \ __seqprop_case((s), rwlock, prop), \ -- cgit v1.2.3 From 52ac39e5db5148f70392edb654ad882ac8da88a8 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 4 Sep 2020 17:32:29 +0200 Subject: seqlock: seqcount_t: Implement all read APIs as statement expressions The sequence counters read APIs are implemented as CPP macros, so they can take either seqcount_t or any of the seqcount_LOCKNAME_t variants. Such macros then get *directly* transformed to internal C functions that only take plain seqcount_t. Further commits need access to seqcount_LOCKNAME_t inside of the actual read APIs code. Thus transform all of the seqcount read APIs to pure GCC statement expressions instead. This will not break type-safety: all of the transformed APIs resolve to a _Generic() selection that does not have a "default" case. This will also not affect the transformed APIs readability: previously added kernel-doc above all of seqlock.h functions makes the expectations quite clear for call-site developers. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200904153231.11994-4-a.darwish@linutronix.de --- include/linux/seqlock.h | 94 +++++++++++++++++++++++-------------------------- 1 file changed, 45 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 0b4a22f33ac3..f3b78277e26b 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -184,6 +184,12 @@ __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ return &s->seqcount; \ } \ \ +static __always_inline unsigned \ +__seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ +{ \ + return READ_ONCE(s->seqcount.sequence); \ +} \ + \ static __always_inline bool \ __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ { \ @@ -205,6 +211,11 @@ static inline seqcount_t *__seqprop_ptr(seqcount_t *s) return s; } +static inline unsigned __seqprop_sequence(const seqcount_t *s) +{ + return READ_ONCE(s->sequence); +} + static inline bool __seqprop_preemptible(const seqcount_t *s) { return false; @@ -250,6 +261,7 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base) __seqprop_case((s), ww_mutex, prop)) #define __seqcount_ptr(s) __seqprop(s, ptr) +#define __seqcount_sequence(s) __seqprop(s, sequence) #define __seqcount_lock_preemptible(s) __seqprop(s, preemptible) #define __seqcount_assert_lock_held(s) __seqprop(s, assert) @@ -268,21 +280,15 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base) * Return: count to be passed to read_seqcount_retry() */ #define __read_seqcount_begin(s) \ - __read_seqcount_t_begin(__seqcount_ptr(s)) - -static inline unsigned __read_seqcount_t_begin(const seqcount_t *s) -{ - unsigned ret; - -repeat: - ret = READ_ONCE(s->sequence); - if (unlikely(ret & 1)) { - cpu_relax(); - goto repeat; - } - kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); - return ret; -} +({ \ + unsigned seq; \ + \ + while ((seq = __seqcount_sequence(s)) & 1) \ + cpu_relax(); \ + \ + kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ + seq; \ +}) /** * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep @@ -291,14 +297,12 @@ repeat: * Return: count to be passed to read_seqcount_retry() */ #define raw_read_seqcount_begin(s) \ - raw_read_seqcount_t_begin(__seqcount_ptr(s)) - -static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s) -{ - unsigned ret = __read_seqcount_t_begin(s); - smp_rmb(); - return ret; -} +({ \ + unsigned seq = __read_seqcount_begin(s); \ + \ + smp_rmb(); \ + seq; \ +}) /** * read_seqcount_begin() - begin a seqcount_t read critical section @@ -307,13 +311,10 @@ static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s) * Return: count to be passed to read_seqcount_retry() */ #define read_seqcount_begin(s) \ - read_seqcount_t_begin(__seqcount_ptr(s)) - -static inline unsigned read_seqcount_t_begin(const seqcount_t *s) -{ - seqcount_lockdep_reader_access(s); - return raw_read_seqcount_t_begin(s); -} +({ \ + seqcount_lockdep_reader_access(__seqcount_ptr(s)); \ + raw_read_seqcount_begin(s); \ +}) /** * raw_read_seqcount() - read the raw seqcount_t counter value @@ -327,15 +328,13 @@ static inline unsigned read_seqcount_t_begin(const seqcount_t *s) * Return: count to be passed to read_seqcount_retry() */ #define raw_read_seqcount(s) \ - raw_read_seqcount_t(__seqcount_ptr(s)) - -static inline unsigned raw_read_seqcount_t(const seqcount_t *s) -{ - unsigned ret = READ_ONCE(s->sequence); - smp_rmb(); - kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); - return ret; -} +({ \ + unsigned seq = __seqcount_sequence(s); \ + \ + smp_rmb(); \ + kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ + seq; \ +}) /** * raw_seqcount_begin() - begin a seqcount_t read critical section w/o @@ -355,16 +354,13 @@ static inline unsigned raw_read_seqcount_t(const seqcount_t *s) * Return: count to be passed to read_seqcount_retry() */ #define raw_seqcount_begin(s) \ - raw_seqcount_t_begin(__seqcount_ptr(s)) - -static inline unsigned raw_seqcount_t_begin(const seqcount_t *s) -{ - /* - * If the counter is odd, let read_seqcount_retry() fail - * by decrementing the counter. - */ - return raw_read_seqcount_t(s) & ~1; -} +({ \ + /* \ + * If the counter is odd, let read_seqcount_retry() fail \ + * by decrementing the counter. \ + */ \ + raw_read_seqcount(s) & ~1; \ +}) /** * __read_seqcount_retry() - end a seqcount_t read section w/o barrier -- cgit v1.2.3 From 8117ab508f9c476e0a10b9db7f4818f784cf3176 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 4 Sep 2020 17:32:30 +0200 Subject: seqlock: seqcount_LOCKNAME_t: Introduce PREEMPT_RT support Preemption must be disabled before entering a sequence counter write side critical section. Otherwise the read side section can preempt the write side section and spin for the entire scheduler tick. If that reader belongs to a real-time scheduling class, it can spin forever and the kernel will livelock. Disabling preemption cannot be done for PREEMPT_RT though: it can lead to higher latencies, and the write side sections will not be able to acquire locks which become sleeping locks (e.g. spinlock_t). To remain preemptible, while avoiding a possible livelock caused by the reader preempting the writer, use a different technique: let the reader detect if a seqcount_LOCKNAME_t writer is in progress. If that's the case, acquire then release the associated LOCKNAME writer serialization lock. This will allow any possibly-preempted writer to make progress until the end of its writer serialization lock critical section. Implement this lock-unlock technique for all seqcount_LOCKNAME_t with an associated (PREEMPT_RT) sleeping lock. References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks") Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200519214547.352050-1-a.darwish@linutronix.de --- include/linux/seqlock.h | 61 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index f3b78277e26b..2bc95104ab1b 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -131,7 +132,23 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * See Documentation/locking/seqlock.rst */ -#ifdef CONFIG_LOCKDEP +/* + * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot + * disable preemption. It can lead to higher latencies, and the write side + * sections will not be able to acquire locks which become sleeping locks + * (e.g. spinlock_t). + * + * To remain preemptible while avoiding a possible livelock caused by the + * reader preempting the writer, use a different technique: let the reader + * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the + * case, acquire then release the associated LOCKNAME writer serialization + * lock. This will allow any possibly-preempted writer to make progress + * until the end of its writer serialization lock critical section. + * + * This lock-unlock technique must be implemented for all of PREEMPT_RT + * sleeping locks. See Documentation/locking/locktypes.rst + */ +#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT) #define __SEQ_LOCK(expr) expr #else #define __SEQ_LOCK(expr) @@ -162,10 +179,12 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t * @locktype: LOCKNAME canonical C data type - * @preemptible: preemptibility of above lockname + * @preemptible: preemptibility of above locktype * @lockmember: argument for lockdep_assert_held() + * @lockbase: associated lock release function (prefix only) + * @lock_acquire: associated lock acquisition function (full call) */ -#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember) \ +#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \ typedef struct seqcount_##lockname { \ seqcount_t seqcount; \ __SEQ_LOCK(locktype *lock); \ @@ -187,13 +206,33 @@ __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ static __always_inline unsigned \ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ { \ - return READ_ONCE(s->seqcount.sequence); \ + unsigned seq = READ_ONCE(s->seqcount.sequence); \ + \ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ + return seq; \ + \ + if (preemptible && unlikely(seq & 1)) { \ + __SEQ_LOCK(lock_acquire); \ + __SEQ_LOCK(lockbase##_unlock(s->lock)); \ + \ + /* \ + * Re-read the sequence counter since the (possibly \ + * preempted) writer made progress. \ + */ \ + seq = READ_ONCE(s->seqcount.sequence); \ + } \ + \ + return seq; \ } \ \ static __always_inline bool \ __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ { \ - return preemptible; \ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ + return preemptible; \ + \ + /* PREEMPT_RT relies on the above LOCK+UNLOCK */ \ + return false; \ } \ \ static __always_inline void \ @@ -226,11 +265,13 @@ static inline void __seqprop_assert(const seqcount_t *s) lockdep_assert_preemption_disabled(); } -SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock) -SEQCOUNT_LOCKNAME(spinlock, spinlock_t, false, s->lock) -SEQCOUNT_LOCKNAME(rwlock, rwlock_t, false, s->lock) -SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock) -SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base) +#define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT) + +SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_spin, raw_spin_lock(s->lock)) +SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock)) +SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock)) +SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock)) +SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL)) /* * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t -- cgit v1.2.3 From 1909760f5fc3f123e47b4e24e0ccdc0fc8f3f106 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 4 Sep 2020 17:32:31 +0200 Subject: seqlock: PREEMPT_RT: Do not starve seqlock_t writers On PREEMPT_RT, seqlock_t is transformed to a sleeping lock that do not disable preemption. A seqlock_t reader can thus preempt its write side section and spin for the enter scheduler tick. If that reader belongs to a real-time scheduling class, it can spin forever and the kernel will livelock. To break this livelock possibility on PREEMPT_RT, implement seqlock_t in terms of "seqcount_spinlock_t" instead of plain "seqcount_t". Beside its pure annotational value, this will leverage the existing seqcount_LOCKNAME_T PREEMPT_RT anti-livelock mechanisms, without adding any extra code. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200904153231.11994-6-a.darwish@linutronix.de --- include/linux/seqlock.h | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 2bc95104ab1b..f73c7eb68f27 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -790,13 +790,17 @@ static inline void raw_write_seqcount_latch(seqcount_latch_t *s) * - Documentation/locking/seqlock.rst */ typedef struct { - struct seqcount seqcount; + /* + * Make sure that readers don't starve writers on PREEMPT_RT: use + * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK(). + */ + seqcount_spinlock_t seqcount; spinlock_t lock; } seqlock_t; #define __SEQLOCK_UNLOCKED(lockname) \ { \ - .seqcount = SEQCNT_ZERO(lockname), \ + .seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \ .lock = __SPIN_LOCK_UNLOCKED(lockname) \ } @@ -806,8 +810,8 @@ typedef struct { */ #define seqlock_init(sl) \ do { \ - seqcount_init(&(sl)->seqcount); \ spin_lock_init(&(sl)->lock); \ + seqcount_spinlock_init(&(sl)->seqcount, &(sl)->lock); \ } while (0) /** @@ -854,6 +858,12 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) return read_seqcount_retry(&sl->seqcount, start); } +/* + * For all seqlock_t write side functions, use write_seqcount_*t*_begin() + * instead of the generic write_seqcount_begin(). This way, no redundant + * lockdep_assert_held() checks are added. + */ + /** * write_seqlock() - start a seqlock_t write side critical section * @sl: Pointer to seqlock_t @@ -870,7 +880,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) static inline void write_seqlock(seqlock_t *sl) { spin_lock(&sl->lock); - write_seqcount_t_begin(&sl->seqcount); + write_seqcount_t_begin(&sl->seqcount.seqcount); } /** @@ -882,7 +892,7 @@ static inline void write_seqlock(seqlock_t *sl) */ static inline void write_sequnlock(seqlock_t *sl) { - write_seqcount_t_end(&sl->seqcount); + write_seqcount_t_end(&sl->seqcount.seqcount); spin_unlock(&sl->lock); } @@ -896,7 +906,7 @@ static inline void write_sequnlock(seqlock_t *sl) static inline void write_seqlock_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); - write_seqcount_t_begin(&sl->seqcount); + write_seqcount_t_begin(&sl->seqcount.seqcount); } /** @@ -909,7 +919,7 @@ static inline void write_seqlock_bh(seqlock_t *sl) */ static inline void write_sequnlock_bh(seqlock_t *sl) { - write_seqcount_t_end(&sl->seqcount); + write_seqcount_t_end(&sl->seqcount.seqcount); spin_unlock_bh(&sl->lock); } @@ -923,7 +933,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl) static inline void write_seqlock_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); - write_seqcount_t_begin(&sl->seqcount); + write_seqcount_t_begin(&sl->seqcount.seqcount); } /** @@ -935,7 +945,7 @@ static inline void write_seqlock_irq(seqlock_t *sl) */ static inline void write_sequnlock_irq(seqlock_t *sl) { - write_seqcount_t_end(&sl->seqcount); + write_seqcount_t_end(&sl->seqcount.seqcount); spin_unlock_irq(&sl->lock); } @@ -944,7 +954,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) unsigned long flags; spin_lock_irqsave(&sl->lock, flags); - write_seqcount_t_begin(&sl->seqcount); + write_seqcount_t_begin(&sl->seqcount.seqcount); return flags; } @@ -973,7 +983,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) static inline void write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) { - write_seqcount_t_end(&sl->seqcount); + write_seqcount_t_end(&sl->seqcount.seqcount); spin_unlock_irqrestore(&sl->lock, flags); } -- cgit v1.2.3 From 44fae179ce73a26733d9e2d346da4e1a1cb94647 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 21 Aug 2020 12:57:53 -0700 Subject: perf/core: Pull pmu::sched_task() into perf_event_context_sched_out() The pmu::sched_task() is a context switch callback. It passes the cpuctx->task_ctx as a parameter to the lower code. To find the cpuctx->task_ctx, the current code iterates a cpuctx list. The same context will iterated in perf_event_context_sched_out() soon. Share the cpuctx->task_ctx can avoid the unnecessary iteration of the cpuctx list. The pmu::sched_task() is also required for the optimization case for equivalent contexts. The task_ctx_sched_out() will eventually disable and reenable the PMU when schedule out events. Add perf_pmu_disable() and perf_pmu_enable() around task_ctx_sched_out() don't break anything. Drop the cpuctx->ctx.lock for the pmu::sched_task(). The lock is for per-CPU context, which is not necessary for the per-task context schedule. No one uses sched_cb_entry, perf_sched_cb_usages, sched_cb_list, and perf_pmu_sched_task() any more. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200821195754.20159-2-kan.liang@linux.intel.com --- include/linux/perf_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 46a3974eb4fe..0c19d279b97f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -872,7 +872,6 @@ struct perf_cpu_context { struct list_head cgrp_cpuctx_entry; #endif - struct list_head sched_cb_entry; int sched_cb_usage; int online; -- cgit v1.2.3 From 6bbdd563ee9a6078725727571586c66c8613db64 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 3 Mar 2020 14:58:21 -0500 Subject: dax: Create a range version of dax_layout_busy_page() virtiofs device has a range of memory which is mapped into file inodes using dax. This memory is mapped in qemu on host and maps different sections of real file on host. Size of this memory is limited (determined by administrator) and depending on filesystem size, we will soon reach a situation where all the memory is in use and we need to reclaim some. As part of reclaim process, we will need to make sure that there are no active references to pages (taken by get_user_pages()) on the memory range we are trying to reclaim. I am planning to use dax_layout_busy_page() for this. But in current form this is per inode and scans through all the pages of the inode. We want to reclaim only a portion of memory (say 2MB page). So we want to make sure that only that 2MB range of pages do not have any references (and don't want to unmap all the pages of inode). Hence, create a range version of this function named dax_layout_busy_page_range() which can be used to pass a range which needs to be unmapped. Cc: Dan Williams Cc: linux-nvdimm@lists.01.org Cc: Jan Kara Cc: Vishal L Verma Cc: "Weiny, Ira" Signed-off-by: Vivek Goyal Reviewed-by: Jan Kara Signed-off-by: Miklos Szeredi --- include/linux/dax.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 6904d4e0b2e0..9016929db4c6 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -141,6 +141,7 @@ int dax_writeback_mapping_range(struct address_space *mapping, struct dax_device *dax_dev, struct writeback_control *wbc); struct page *dax_layout_busy_page(struct address_space *mapping); +struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); dax_entry_t dax_lock_page(struct page *page); void dax_unlock_page(struct page *page, dax_entry_t cookie); #else @@ -171,6 +172,11 @@ static inline struct page *dax_layout_busy_page(struct address_space *mapping) return NULL; } +static inline struct page *dax_layout_busy_page_range(struct address_space *mapping, pgoff_t start, pgoff_t nr_pages) +{ + return NULL; +} + static inline int dax_writeback_mapping_range(struct address_space *mapping, struct dax_device *dax_dev, struct writeback_control *wbc) { -- cgit v1.2.3 From 61b82bbf693ecd307550ee64a8af192e8e33c46c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 2 Sep 2020 20:31:04 +0300 Subject: swiotlb: Declare swiotlb_late_init_with_default_size() in header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiler is not happy about one function prototype: CC kernel/dma/swiotlb.o kernel/dma/swiotlb.c:275:1: warning: no previous prototype for ‘swiotlb_late_init_with_default_size’ [-Wmissing-prototypes] 275 | swiotlb_late_init_with_default_size(size_t default_size) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Since it's used outside of the module, move its declaration to the header from the user. Signed-off-by: Andy Shevchenko Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 046bb94bd4d6..513913ff7486 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -34,6 +34,7 @@ int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); extern unsigned long swiotlb_nr_tbl(void); unsigned long swiotlb_size_or_default(void); extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); +extern int swiotlb_late_init_with_default_size(size_t default_size); extern void __init swiotlb_update_mem_attributes(void); /* -- cgit v1.2.3 From 95f6f3a46fc4ee1a2b216a6b46bdf2b450f1877f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:29 +0200 Subject: block: add a bdev_check_media_change helper Like check_disk_changed, except that it does not call ->revalidate_disk but leaves that to the caller. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c618b27292fc..322d48a20772 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -315,7 +315,6 @@ extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, bool update_bdev); -extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask); /* drivers/char/random.c */ extern void add_disk_randomness(struct gendisk *disk) __latent_entropy; @@ -372,6 +371,7 @@ void unregister_blkdev(unsigned int major, const char *name); void revalidate_disk_size(struct gendisk *disk, bool verbose); int check_disk_change(struct block_device *bdev); +bool bdev_check_media_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors); -- cgit v1.2.3 From fec2cf607ba9305770436b1e5c485963a9f0a7bd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:42 +0200 Subject: ide-gd: stop using the disk events mechanism ide-gd is only using the disk events mechanism to be able to force an invalidation and partition scan on opening removable media. Just open code the logic without invoving the block layer. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/ide.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index a254841bd315..62653769509f 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -490,8 +490,6 @@ enum { IDE_DFLAG_NOPROBE = BIT(9), /* need to do check_media_change() */ IDE_DFLAG_REMOVABLE = BIT(10), - /* needed for removable devices */ - IDE_DFLAG_ATTACH = BIT(11), IDE_DFLAG_FORCED_GEOM = BIT(12), /* disallow setting unmask bit */ IDE_DFLAG_NO_UNMASK = BIT(13), -- cgit v1.2.3 From b92b53079aedbfb56bbb9ea360e5119fb563a2a1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:47 +0200 Subject: block: remove check_disk_change Remove the now unused check_disk_change helper. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 322d48a20772..1c97cf84f011 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -370,7 +370,6 @@ int register_blkdev(unsigned int major, const char *name); void unregister_blkdev(unsigned int major, const char *name); void revalidate_disk_size(struct gendisk *disk, bool verbose); -int check_disk_change(struct block_device *bdev); bool bdev_check_media_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors); -- cgit v1.2.3 From 00089c048eb4a8250325efb32a2724fd0da68cce Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Fri, 4 Sep 2020 16:30:25 +0100 Subject: objtool: Rename frame.h -> objtool.h Header frame.h is getting more code annotations to help objtool analyze object files. Rename the file to objtool.h. [ jpoimboe: add objtool.h to MAINTAINERS ] Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- include/linux/frame.h | 35 ----------------------------------- include/linux/objtool.h | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 35 deletions(-) delete mode 100644 include/linux/frame.h create mode 100644 include/linux/objtool.h (limited to 'include/linux') diff --git a/include/linux/frame.h b/include/linux/frame.h deleted file mode 100644 index 303cda600e56..000000000000 --- a/include/linux/frame.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_FRAME_H -#define _LINUX_FRAME_H - -#ifdef CONFIG_STACK_VALIDATION -/* - * This macro marks the given function's stack frame as "non-standard", which - * tells objtool to ignore the function when doing stack metadata validation. - * It should only be used in special cases where you're 100% sure it won't - * affect the reliability of frame pointers and kernel stack traces. - * - * For more information, see tools/objtool/Documentation/stack-validation.txt. - */ -#define STACK_FRAME_NON_STANDARD(func) \ - static void __used __section(.discard.func_stack_frame_non_standard) \ - *__func_stack_frame_non_standard_##func = func - -/* - * This macro indicates that the following intra-function call is valid. - * Any non-annotated intra-function call will cause objtool to issue a warning. - */ -#define ANNOTATE_INTRA_FUNCTION_CALL \ - 999: \ - .pushsection .discard.intra_function_calls; \ - .long 999b; \ - .popsection; - -#else /* !CONFIG_STACK_VALIDATION */ - -#define STACK_FRAME_NON_STANDARD(func) -#define ANNOTATE_INTRA_FUNCTION_CALL - -#endif /* CONFIG_STACK_VALIDATION */ - -#endif /* _LINUX_FRAME_H */ diff --git a/include/linux/objtool.h b/include/linux/objtool.h new file mode 100644 index 000000000000..358175c9c2b5 --- /dev/null +++ b/include/linux/objtool.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_OBJTOOL_H +#define _LINUX_OBJTOOL_H + +#ifdef CONFIG_STACK_VALIDATION +/* + * This macro marks the given function's stack frame as "non-standard", which + * tells objtool to ignore the function when doing stack metadata validation. + * It should only be used in special cases where you're 100% sure it won't + * affect the reliability of frame pointers and kernel stack traces. + * + * For more information, see tools/objtool/Documentation/stack-validation.txt. + */ +#define STACK_FRAME_NON_STANDARD(func) \ + static void __used __section(.discard.func_stack_frame_non_standard) \ + *__func_stack_frame_non_standard_##func = func + +/* + * This macro indicates that the following intra-function call is valid. + * Any non-annotated intra-function call will cause objtool to issue a warning. + */ +#define ANNOTATE_INTRA_FUNCTION_CALL \ + 999: \ + .pushsection .discard.intra_function_calls; \ + .long 999b; \ + .popsection; + +#else /* !CONFIG_STACK_VALIDATION */ + +#define STACK_FRAME_NON_STANDARD(func) +#define ANNOTATE_INTRA_FUNCTION_CALL + +#endif /* CONFIG_STACK_VALIDATION */ + +#endif /* _LINUX_OBJTOOL_H */ -- cgit v1.2.3 From 5567c6c39f3404e4492c18c0c1abff5556684f6e Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Fri, 4 Sep 2020 16:30:26 +0100 Subject: objtool: Only include valid definitions depending on source file type Header include/linux/objtool.h contains both C and assembly definition that are visible regardless of the file including them. Place definition under conditional __ASSEMBLY__. Reviewed-by: Miroslav Benes Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- include/linux/objtool.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 358175c9c2b5..15e9997a9fb4 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -3,6 +3,8 @@ #define _LINUX_OBJTOOL_H #ifdef CONFIG_STACK_VALIDATION + +#ifndef __ASSEMBLY__ /* * This macro marks the given function's stack frame as "non-standard", which * tells objtool to ignore the function when doing stack metadata validation. @@ -15,6 +17,8 @@ static void __used __section(.discard.func_stack_frame_non_standard) \ *__func_stack_frame_non_standard_##func = func +#else /* __ASSEMBLY__ */ + /* * This macro indicates that the following intra-function call is valid. * Any non-annotated intra-function call will cause objtool to issue a warning. @@ -25,6 +29,8 @@ .long 999b; \ .popsection; +#endif /* __ASSEMBLY__ */ + #else /* !CONFIG_STACK_VALIDATION */ #define STACK_FRAME_NON_STANDARD(func) -- cgit v1.2.3 From ee819aedf34a8f35cd54ee3967c7beb4d1d4a635 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Fri, 4 Sep 2020 16:30:27 +0100 Subject: objtool: Make unwind hint definitions available to other architectures Unwind hints are useful to provide objtool with information about stack states in non-standard functions/code. While the type of information being provided might be very arch specific, the mechanism to provide the information can be useful for other architectures. Move the relevant unwint hint definitions for all architectures to see. [ jpoimboe: REGS_IRET -> REGS_PARTIAL ] Signed-off-by: Julien Thierry Signed-off-by: Josh Poimboeuf --- include/linux/objtool.h | 88 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) (limited to 'include/linux') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 15e9997a9fb4..ab82c793c897 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -2,9 +2,55 @@ #ifndef _LINUX_OBJTOOL_H #define _LINUX_OBJTOOL_H +#ifndef __ASSEMBLY__ + +#include + +/* + * This struct is used by asm and inline asm code to manually annotate the + * location of registers on the stack. + */ +struct unwind_hint { + u32 ip; + s16 sp_offset; + u8 sp_reg; + u8 type; + u8 end; +}; +#endif + +/* + * UNWIND_HINT_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP + * (the caller's SP right before it made the call). Used for all callable + * functions, i.e. all C code and all callable asm functions. + * + * UNWIND_HINT_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset + * points to a fully populated pt_regs from a syscall, interrupt, or exception. + * + * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that + * sp_reg+sp_offset points to the iret return frame. + */ +#define UNWIND_HINT_TYPE_CALL 0 +#define UNWIND_HINT_TYPE_REGS 1 +#define UNWIND_HINT_TYPE_REGS_PARTIAL 2 +#define UNWIND_HINT_TYPE_RET_OFFSET 3 + #ifdef CONFIG_STACK_VALIDATION #ifndef __ASSEMBLY__ + +#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ + "987: \n\t" \ + ".pushsection .discard.unwind_hints\n\t" \ + /* struct unwind_hint */ \ + ".long 987b - .\n\t" \ + ".short " __stringify(sp_offset) "\n\t" \ + ".byte " __stringify(sp_reg) "\n\t" \ + ".byte " __stringify(type) "\n\t" \ + ".byte " __stringify(end) "\n\t" \ + ".balign 4 \n\t" \ + ".popsection\n\t" + /* * This macro marks the given function's stack frame as "non-standard", which * tells objtool to ignore the function when doing stack metadata validation. @@ -29,12 +75,54 @@ .long 999b; \ .popsection; +/* + * In asm, there are two kinds of code: normal C-type callable functions and + * the rest. The normal callable functions can be called by other code, and + * don't do anything unusual with the stack. Such normal callable functions + * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this + * category. In this case, no special debugging annotations are needed because + * objtool can automatically generate the ORC data for the ORC unwinder to read + * at runtime. + * + * Anything which doesn't fall into the above category, such as syscall and + * interrupt handlers, tends to not be called directly by other functions, and + * often does unusual non-C-function-type things with the stack pointer. Such + * code needs to be annotated such that objtool can understand it. The + * following CFI hint macros are for this type of code. + * + * These macros provide hints to objtool about the state of the stack at each + * instruction. Objtool starts from the hints and follows the code flow, + * making automatic CFI adjustments when it sees pushes and pops, filling out + * the debuginfo as necessary. It will also warn if it sees any + * inconsistencies. + */ +.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.Lunwind_hint_ip_\@: + .pushsection .discard.unwind_hints + /* struct unwind_hint */ + .long .Lunwind_hint_ip_\@ - . + .short \sp_offset + .byte \sp_reg + .byte \type + .byte \end + .balign 4 + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_STACK_VALIDATION */ +#ifndef __ASSEMBLY__ + +#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ + "\n\t" #define STACK_FRAME_NON_STANDARD(func) +#else #define ANNOTATE_INTRA_FUNCTION_CALL +.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.endm +#endif #endif /* CONFIG_STACK_VALIDATION */ -- cgit v1.2.3 From 0feea33d79825d05b5ede30947db4df34722b463 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 22 Jul 2020 15:01:20 -0700 Subject: soc: qcom-geni-se: Don't use relaxed writes when writing commands Writing the command is the final step in kicking off a transfer. Let's use writel() to ensure that any other memory accesses are done before the command kicks off. It's expected that this is mostly relevant if we're in DMA mode but since it doesn't appear to regress performance in a measurable way [1] even in PIO mode and it's easier to reason about then let's just always use it. NOTE: this patch came about due to code inspection. No actual problems were observed that this patch fixes. [1] Tested by timing "flashrom -p ec" on a Chromebook which stresses GENI SPI a lot. Reviewed-by: Mukesh Kumar Savaliya Reviewed-by: Akash Asthana Reviewed-by: Stephen Boyd Suggested-by: Stephen Boyd Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20200722150113.1.Ia50ab5cb8a6d3a73d302e6bdc25542d48ffd27f4@changeid Signed-off-by: Bjorn Andersson --- include/linux/qcom-geni-se.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h index 8f385fbe5a0e..ae4a8a766b69 100644 --- a/include/linux/qcom-geni-se.h +++ b/include/linux/qcom-geni-se.h @@ -296,7 +296,7 @@ static inline void geni_se_setup_m_cmd(struct geni_se *se, u32 cmd, u32 params) u32 m_cmd; m_cmd = (cmd << M_OPCODE_SHFT) | (params & M_PARAMS_MSK); - writel_relaxed(m_cmd, se->base + SE_GENI_M_CMD0); + writel(m_cmd, se->base + SE_GENI_M_CMD0); } /** @@ -316,7 +316,7 @@ static inline void geni_se_setup_s_cmd(struct geni_se *se, u32 cmd, u32 params) s_cmd &= ~(S_OPCODE_MSK | S_PARAMS_MSK); s_cmd |= (cmd << S_OPCODE_SHFT); s_cmd |= (params & S_PARAMS_MSK); - writel_relaxed(s_cmd, se->base + SE_GENI_S_CMD0); + writel(s_cmd, se->base + SE_GENI_S_CMD0); } /** -- cgit v1.2.3 From 5198d545dba8ad893f5e5a029ca8d43ee7bcf011 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 9 Sep 2020 10:37:51 -0700 Subject: net: remove napi_hash_del() from driver-facing API We allow drivers to call napi_hash_del() before calling netif_napi_del() to batch RCU grace periods. This makes the API asymmetric and leaks internal implementation details. Soon we will want the grace period to protect more than just the NAPI hash table. Restructure the API and have drivers call a new function - __netif_napi_del() if they want to take care of RCU waits. Note that only core was checking the return status from napi_hash_del() so the new helper does not report if the NAPI was actually deleted. Some notes on driver oddness: - veth observed the grace period before calling netif_napi_del() but that should not matter - myri10ge observed normal RCU flavor - bnx2x and enic did not actually observe the grace period (unless they did so implicitly) - virtio_net and enic only unhashed Rx NAPIs The last two points seem to indicate that the calls to napi_hash_del() were a left over rather than an optimization. Regardless, it's easy enough to correct them. This patch may introduce extra synchronize_net() calls for interfaces which set NAPI_STATE_NO_BUSY_POLL and depend on free_netdev() to call netif_napi_del(). This seems inevitable since we want to use RCU for netpoll dev->napi_list traversal, and almost no drivers set IFF_DISABLE_NETPOLL. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7f9fcfd15942..74ed95215091 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -70,6 +70,7 @@ struct udp_tunnel_nic; struct bpf_prog; struct xdp_buff; +void synchronize_net(void); void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -488,20 +489,6 @@ static inline bool napi_complete(struct napi_struct *n) return napi_complete_done(n, 0); } -/** - * napi_hash_del - remove a NAPI from global table - * @napi: NAPI context - * - * Warning: caller must observe RCU grace period - * before freeing memory containing @napi, if - * this function returns true. - * Note: core networking stack automatically calls it - * from netif_napi_del(). - * Drivers might want to call this helper to combine all - * the needed RCU grace periods into a single one. - */ -bool napi_hash_del(struct napi_struct *napi); - /** * napi_disable - prevent NAPI from scheduling * @n: NAPI context @@ -2367,13 +2354,27 @@ static inline void netif_tx_napi_add(struct net_device *dev, netif_napi_add(dev, napi, poll, weight); } +/** + * __netif_napi_del - remove a NAPI context + * @napi: NAPI context + * + * Warning: caller must observe RCU grace period before freeing memory + * containing @napi. Drivers might want to call this helper to combine + * all the needed RCU grace periods into a single one. + */ +void __netif_napi_del(struct napi_struct *napi); + /** * netif_napi_del - remove a NAPI context * @napi: NAPI context * * netif_napi_del() removes a NAPI context from the network device NAPI list */ -void netif_napi_del(struct napi_struct *napi); +static inline void netif_napi_del(struct napi_struct *napi) +{ + __netif_napi_del(napi); + synchronize_net(); +} struct napi_gro_cb { /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ @@ -2797,7 +2798,6 @@ static inline void unregister_netdevice(struct net_device *dev) int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); void netdev_freemem(struct net_device *dev); -void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); struct net_device *netdev_get_xmit_slave(struct net_device *dev, -- cgit v1.2.3 From 4d092dd2041a5f328410ad16d63b8606c24e8604 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 9 Sep 2020 10:37:52 -0700 Subject: net: manage napi add/del idempotence explicitly To RCUify napi->dev_list we need to replace list_del_init() with list_del_rcu(). There is no _init() version for RCU for obvious reasons. Up until now netif_napi_del() was idempotent so to make sure it remains such add a bit which is set when NAPI is listed, and cleared when it removed. Since we don't expect multiple calls to netif_napi_add() to be correct, add a warning on that side. Now that napi_hash_add / napi_hash_del are only called by napi_add / del we can actually steal its bit. We just need to make sure hash node is initialized correctly. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 74ed95215091..157e0242e9ee 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -355,7 +355,7 @@ enum { NAPI_STATE_MISSED, /* reschedule a napi */ NAPI_STATE_DISABLE, /* Disable pending */ NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ - NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ + NAPI_STATE_LISTED, /* NAPI added to system lists */ NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ }; @@ -365,7 +365,7 @@ enum { NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED), NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE), NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC), - NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED), + NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED), NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), }; -- cgit v1.2.3 From e9b12edc133b54e15ecd105620d51fb8e8fa8bde Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 9 Sep 2020 17:50:46 -0700 Subject: tcp: record received TOS value in the request socket A new field is added to the request sock to record the TOS value received on the listening socket during 3WHS: When not under syn flood, it is recording the TOS value sent in SYN. When under syn flood, it is recording the TOS value sent in the ACK. This is a preparation patch in order to do TOS reflection in the later commit. Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 56ff2952edaf..2f87377e9af7 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -134,6 +134,7 @@ struct tcp_request_sock { * FastOpen it's the seq# * after data-in-SYN. */ + u8 syn_tos; }; static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) -- cgit v1.2.3 From 3d7bfea8b8378277a25b42b28fe5a2a5ca76a7cf Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 8 Jul 2020 02:12:34 -0700 Subject: unicode: Add utf8_casefold_hash This adds a case insensitive hash function to allow taking the hash without needing to allocate a casefolded copy of the string. The existing d_hash implementations for casefolding allocate memory within rcu-walk, by avoiding it we can be more efficient and avoid worrying about a failed allocation. Signed-off-by: Daniel Rosenberg Reviewed-by: Gabriel Krisman Bertazi Reviewed-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- include/linux/unicode.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 990aa97d8049..74484d44c755 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const struct qstr *str, int utf8_casefold(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen); +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, + struct qstr *str); + struct unicode_map *utf8_load(const char *version); void utf8_unload(struct unicode_map *um); -- cgit v1.2.3 From c843843e714c8f17280d7db009412b1b1baf448b Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 8 Jul 2020 02:12:35 -0700 Subject: fs: Add standard casefolding support This adds general supporting functions for filesystems that use utf8 casefolding. It provides standard dentry_operations and adds the necessary structures in struct super_block to allow this standardization. The new dentry operations are functionally equivalent to the existing operations in ext4 and f2fs, apart from the use of utf8_casefold_hash to avoid an allocation. By providing a common implementation, all users can benefit from any optimizations without needing to port over improvements. Signed-off-by: Daniel Rosenberg Reviewed-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- include/linux/fs.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7519ae003a08..bc5417c61e12 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1371,6 +1371,12 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_ACTIVE (1<<30) #define SB_NOUSER (1<<31) +/* These flags relate to encoding and casefolding */ +#define SB_ENC_STRICT_MODE_FL (1 << 0) + +#define sb_has_strict_encoding(sb) \ + (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL) + /* * Umount options */ @@ -1440,6 +1446,10 @@ struct super_block { #endif #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; +#endif +#ifdef CONFIG_UNICODE + struct unicode_map *s_encoding; + __u16 s_encoding_flags; #endif struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */ @@ -3262,6 +3272,12 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); +#ifdef CONFIG_UNICODE +extern int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str); +extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name); +#endif + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *, -- cgit v1.2.3 From eca4873ee1b63ee051e0eed91099fa42c97b2438 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 8 Jul 2020 02:12:36 -0700 Subject: f2fs: Use generic casefolding support This switches f2fs over to the generic support provided in the previous patch. Since casefolded dentries behave the same in ext4 and f2fs, we decrease the maintenance burden by unifying them, and any optimizations will immediately apply to both. Signed-off-by: Daniel Rosenberg Reviewed-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 3c383ddd92dd..a5dbb57a687f 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -38,9 +38,6 @@ #define F2FS_MAX_QUOTAS 3 #define F2FS_ENC_UTF8_12_1 1 -#define F2FS_ENC_STRICT_MODE_FL (1 << 0) -#define f2fs_has_strict_mode(sbi) \ - (sbi->s_encoding_flags & F2FS_ENC_STRICT_MODE_FL) #define F2FS_IO_SIZE(sbi) (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ #define F2FS_IO_SIZE_KB(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */ -- cgit v1.2.3 From d66423fbe11e141206f117b232916aa899d44959 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 10 Sep 2020 12:02:48 +0100 Subject: bpf: Plug hole in struct bpf_sk_lookup_kern As Alexei points out, struct bpf_sk_lookup_kern has two 4-byte holes. This leads to suboptimal instructions being generated (IPv4, x86): 1372 struct bpf_sk_lookup_kern ctx = { 0xffffffff81b87f30 <+624>: xor %eax,%eax 0xffffffff81b87f32 <+626>: mov $0x6,%ecx 0xffffffff81b87f37 <+631>: lea 0x90(%rsp),%rdi 0xffffffff81b87f3f <+639>: movl $0x110002,0x88(%rsp) 0xffffffff81b87f4a <+650>: rep stos %rax,%es:(%rdi) 0xffffffff81b87f4d <+653>: mov 0x8(%rsp),%eax 0xffffffff81b87f51 <+657>: mov %r13d,0x90(%rsp) 0xffffffff81b87f59 <+665>: incl %gs:0x7e4970a0(%rip) 0xffffffff81b87f60 <+672>: mov %eax,0x8c(%rsp) 0xffffffff81b87f67 <+679>: movzwl 0x10(%rsp),%eax 0xffffffff81b87f6c <+684>: mov %ax,0xa8(%rsp) 0xffffffff81b87f74 <+692>: movzwl 0x38(%rsp),%eax 0xffffffff81b87f79 <+697>: mov %ax,0xaa(%rsp) Fix this by moving around sport and dport. pahole confirms there are no more holes: struct bpf_sk_lookup_kern { u16 family; /* 0 2 */ u16 protocol; /* 2 2 */ __be16 sport; /* 4 2 */ u16 dport; /* 6 2 */ struct { __be32 saddr; /* 8 4 */ __be32 daddr; /* 12 4 */ } v4; /* 8 8 */ struct { const struct in6_addr * saddr; /* 16 8 */ const struct in6_addr * daddr; /* 24 8 */ } v6; /* 16 16 */ struct sock * selected_sk; /* 32 8 */ bool no_reuseport; /* 40 1 */ /* size: 48, cachelines: 1, members: 8 */ /* padding: 7 */ /* last cacheline: 48 bytes */ }; The assembly also doesn't contain the pesky rep stos anymore: 1372 struct bpf_sk_lookup_kern ctx = { 0xffffffff81b87f60 <+624>: movzwl 0x10(%rsp),%eax 0xffffffff81b87f65 <+629>: movq $0x0,0xa8(%rsp) 0xffffffff81b87f71 <+641>: movq $0x0,0xb0(%rsp) 0xffffffff81b87f7d <+653>: mov %ax,0x9c(%rsp) 0xffffffff81b87f85 <+661>: movzwl 0x38(%rsp),%eax 0xffffffff81b87f8a <+666>: movq $0x0,0xb8(%rsp) 0xffffffff81b87f96 <+678>: mov %ax,0x9e(%rsp) 0xffffffff81b87f9e <+686>: mov 0x8(%rsp),%eax 0xffffffff81b87fa2 <+690>: movq $0x0,0xc0(%rsp) 0xffffffff81b87fae <+702>: movl $0x110002,0x98(%rsp) 0xffffffff81b87fb9 <+713>: mov %eax,0xa0(%rsp) 0xffffffff81b87fc0 <+720>: mov %r13d,0xa4(%rsp) 1: https://lore.kernel.org/bpf/CAADnVQKE6y9h2fwX6OS837v-Uf+aBXnT_JXiN_bbo2gitZQ3tA@mail.gmail.com/ Fixes: e9ddbb7707ff ("bpf: Introduce SK_LOOKUP program type with a dedicated attach point") Suggested-by: Alexei Starovoitov Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/20200910110248.198326-1-lmb@cloudflare.com --- include/linux/filter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 995625950cc1..e962dd8117d8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1287,6 +1287,8 @@ int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len); struct bpf_sk_lookup_kern { u16 family; u16 protocol; + __be16 sport; + u16 dport; struct { __be32 saddr; __be32 daddr; @@ -1295,8 +1297,6 @@ struct bpf_sk_lookup_kern { const struct in6_addr *saddr; const struct in6_addr *daddr; } v6; - __be16 sport; - u16 dport; struct sock *selected_sk; bool no_reuseport; }; -- cgit v1.2.3 From e33d2a7b3041d7f8cd1f0a2a4ca42a5bc112b14e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 31 Aug 2020 18:16:45 +0300 Subject: SUNRPC: remove RC4-HMAC-MD5 support from KerberosV The RC4-HMAC-MD5 KerberosV algorithm is based on RFC 4757 [0], which was specifically issued for interoperability with Windows 2000, but was never intended to receive the same level of support. The RFC says The IETF Kerberos community supports publishing this specification as an informational document in order to describe this widely implemented technology. However, while these encryption types provide the operations necessary to implement the base Kerberos specification [RFC4120], they do not provide all the required operations in the Kerberos cryptography framework [RFC3961]. As a result, it is not generally possible to implement potential extensions to Kerberos using these encryption types. The Kerberos encryption type negotiation mechanism [RFC4537] provides one approach for using such extensions even when a Kerberos infrastructure uses long-term RC4 keys. Because this specification does not implement operations required by RFC 3961 and because of security concerns with the use of RC4 and MD4 discussed in Section 8, this specification is not appropriate for publication on the standards track. The RC4-HMAC encryption types are used to ease upgrade of existing Windows NT environments, provide strong cryptography (128-bit key lengths), and provide exportable (meet United States government export restriction requirements) encryption. This document describes the implementation of those encryption types. Furthermore, this RFC was re-classified as 'historic' by RFC 8429 [1] in 2018, stating that 'none of the encryption types it specifies should be used' Note that other outdated algorithms are left in place (some of which are guarded by CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES), so this should only adversely affect interoperability with Windows NT/2000 systems that have not received any updates since 2008 (but are connected to a network nonetheless) [0] https://tools.ietf.org/html/rfc4757 [1] https://tools.ietf.org/html/rfc8429 Signed-off-by: Ard Biesheuvel Acked-by: J. Bruce Fields Signed-off-by: Herbert Xu --- include/linux/sunrpc/gss_krb5.h | 11 ----------- include/linux/sunrpc/gss_krb5_enctypes.h | 9 +++------ 2 files changed, 3 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index e8f8ffe7448b..91f43d86879d 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -141,14 +141,12 @@ enum sgn_alg { SGN_ALG_MD2_5 = 0x0001, SGN_ALG_DES_MAC = 0x0002, SGN_ALG_3 = 0x0003, /* not published */ - SGN_ALG_HMAC_MD5 = 0x0011, /* microsoft w2k; no support */ SGN_ALG_HMAC_SHA1_DES3_KD = 0x0004 }; enum seal_alg { SEAL_ALG_NONE = 0xffff, SEAL_ALG_DES = 0x0000, SEAL_ALG_1 = 0x0001, /* not published */ - SEAL_ALG_MICROSOFT_RC4 = 0x0010,/* microsoft w2k; no support */ SEAL_ALG_DES3KD = 0x0002 }; @@ -316,14 +314,5 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, struct xdr_buf *buf, u32 *plainoffset, u32 *plainlen); -int -krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, - struct crypto_sync_skcipher *cipher, - unsigned char *cksum); - -int -krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, - struct crypto_sync_skcipher *cipher, - s32 seqnum); void gss_krb5_make_confounder(char *p, u32 conflen); diff --git a/include/linux/sunrpc/gss_krb5_enctypes.h b/include/linux/sunrpc/gss_krb5_enctypes.h index 981c89cef19d..87eea679d750 100644 --- a/include/linux/sunrpc/gss_krb5_enctypes.h +++ b/include/linux/sunrpc/gss_krb5_enctypes.h @@ -13,15 +13,13 @@ #ifdef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES /* - * NB: This list includes encryption types that were deprecated - * by RFC 8429 (DES3_CBC_SHA1 and ARCFOUR_HMAC). + * NB: This list includes DES3_CBC_SHA1, which was deprecated by RFC 8429. * * ENCTYPE_AES256_CTS_HMAC_SHA1_96 * ENCTYPE_AES128_CTS_HMAC_SHA1_96 * ENCTYPE_DES3_CBC_SHA1 - * ENCTYPE_ARCFOUR_HMAC */ -#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23" +#define KRB5_SUPPORTED_ENCTYPES "18,17,16" #else /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */ @@ -32,12 +30,11 @@ * ENCTYPE_AES256_CTS_HMAC_SHA1_96 * ENCTYPE_AES128_CTS_HMAC_SHA1_96 * ENCTYPE_DES3_CBC_SHA1 - * ENCTYPE_ARCFOUR_HMAC * ENCTYPE_DES_CBC_MD5 * ENCTYPE_DES_CBC_CRC * ENCTYPE_DES_CBC_MD4 */ -#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23,3,1,2" +#define KRB5_SUPPORTED_ENCTYPES "18,17,16,3,1,2" #endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */ -- cgit v1.2.3 From 6d0fd536183034953bf84826fecb37e47779d24b Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 28 Aug 2020 10:20:31 -0700 Subject: include: pe.h: Add RISC-V related PE definition Define RISC-V related machine types. Signed-off-by: Atish Patra Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20200415195422.19866-3-atish.patra@wdc.com Signed-off-by: Ard Biesheuvel --- include/linux/pe.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pe.h b/include/linux/pe.h index 8ad71d763a77..daf09ffffe38 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -55,6 +55,9 @@ #define IMAGE_FILE_MACHINE_POWERPC 0x01f0 #define IMAGE_FILE_MACHINE_POWERPCFP 0x01f1 #define IMAGE_FILE_MACHINE_R4000 0x0166 +#define IMAGE_FILE_MACHINE_RISCV32 0x5032 +#define IMAGE_FILE_MACHINE_RISCV64 0x5064 +#define IMAGE_FILE_MACHINE_RISCV128 0x5128 #define IMAGE_FILE_MACHINE_SH3 0x01a2 #define IMAGE_FILE_MACHINE_SH3DSP 0x01a3 #define IMAGE_FILE_MACHINE_SH3E 0x01a4 -- cgit v1.2.3 From abdaf11ac18925ce8cc229e62e35b342d548ece2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 16:41:50 +0200 Subject: dma-mapping: add (back) arch_dma_mark_clean for ia64 Add back a hook to optimize dcache flushing after reading executable code using DMA. This gets ia64 out of the business of pretending to be dma incoherent just for this optimization. Signed-off-by: Christoph Hellwig --- include/linux/dma-direct.h | 3 +++ include/linux/dma-noncoherent.h | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 6e87225600ae..95e3e28bd93f 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -150,6 +150,9 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev, if (unlikely(is_swiotlb_buffer(paddr))) swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); + + if (dir == DMA_FROM_DEVICE) + arch_dma_mark_clean(paddr, size); } static inline dma_addr_t dma_direct_map_page(struct device *dev, diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index ca09a4e07d2d..b9bc6c557ea4 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -108,6 +108,14 @@ static inline void arch_dma_prep_coherent(struct page *page, size_t size) } #endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */ +#ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN +void arch_dma_mark_clean(phys_addr_t paddr, size_t size); +#else +static inline void arch_dma_mark_clean(phys_addr_t paddr, size_t size) +{ +} +#endif /* ARCH_HAS_DMA_MARK_CLEAN */ + void *arch_dma_set_uncached(void *addr, size_t size); void arch_dma_clear_uncached(void *addr, size_t size); -- cgit v1.2.3 From 2f5388a29be82a62529d146499e70987e856f6f7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 17:06:40 +0200 Subject: dma-direct: remove dma_direct_{alloc,free}_pages Just merge these helpers into the main dma_direct_{alloc,free} routines, as the additional checks are always false for the two callers. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- include/linux/dma-direct.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 95e3e28bd93f..20eceb2e4f91 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -77,10 +77,6 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); -void *dma_direct_alloc_pages(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); -void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs); int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); -- cgit v1.2.3 From 7bc5c428a660d4d1bc95ba54bf4cb6bccf8c3029 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 17:56:22 +0200 Subject: dma-direct: remove __dma_to_phys There is no harm in just always clearing the SME encryption bit, while significantly simplifying the interface. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- include/linux/dma-direct.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 20eceb2e4f91..f00e262ab6b1 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -24,11 +24,12 @@ static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); } -static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) { - phys_addr_t paddr = (phys_addr_t)dev_addr; + phys_addr_t paddr = (phys_addr_t)dev_addr + + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); - return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); + return __sme_clr(paddr); } #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ @@ -44,7 +45,7 @@ static inline bool force_dma_unencrypted(struct device *dev) /* * If memory encryption is supported, phys_to_dma will set the memory encryption * bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma - * and __dma_to_phys versions should only be used on non-encrypted memory for + * version should only be used on non-encrypted memory for * special occasions like DMA coherent buffers. */ static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) @@ -52,11 +53,6 @@ static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) return __sme_set(__phys_to_dma(dev, paddr)); } -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return __sme_clr(__dma_to_phys(dev, daddr)); -} - static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, bool is_ram) { -- cgit v1.2.3 From 5ceda74093a5c1c3f42a02b894df031f3bbc9af1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 17:34:03 +0200 Subject: dma-direct: rename and cleanup __phys_to_dma The __phys_to_dma vs phys_to_dma distinction isn't exactly obvious. Try to improve the situation by renaming __phys_to_dma to phys_to_dma_unencryped, and not forcing architectures that want to override phys_to_dma to actually provide __phys_to_dma. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- include/linux/dma-direct.h | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index f00e262ab6b1..805010ea5346 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -16,14 +16,29 @@ extern unsigned int zone_dma_bits; #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include +#ifndef phys_to_dma_unencrypted +#define phys_to_dma_unencrypted phys_to_dma +#endif #else -static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev, + phys_addr_t paddr) { dma_addr_t dev_addr = (dma_addr_t)paddr; return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); } +/* + * If memory encryption is supported, phys_to_dma will set the memory encryption + * bit in the DMA address, and dma_to_phys will clear it. + * phys_to_dma_unencrypted is for use on special unencrypted memory like swiotlb + * buffers. + */ +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return __sme_set(phys_to_dma_unencrypted(dev, paddr)); +} + static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) { phys_addr_t paddr = (phys_addr_t)dev_addr + @@ -42,17 +57,6 @@ static inline bool force_dma_unencrypted(struct device *dev) } #endif /* CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED */ -/* - * If memory encryption is supported, phys_to_dma will set the memory encryption - * bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma - * version should only be used on non-encrypted memory for - * special occasions like DMA coherent buffers. - */ -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return __sme_set(__phys_to_dma(dev, paddr)); -} - static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, bool is_ram) { -- cgit v1.2.3 From b3003a74456f0c1f614a46c07e16abe33bfdd087 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 4 Sep 2020 19:41:57 -0300 Subject: RDMA/qedr: Remove fbo and zbva from the MR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit zbva is always false, so fbo is never read. A 'zero-based-virtual-address' is simply IOVA == 0, and the driver already supports this. Link: https://lore.kernel.org/r/16-v2-270386b7e60b+28f4-umem_1_jgg@nvidia.com Acked-by: Michal Kalderon  Signed-off-by: Jason Gunthorpe --- include/linux/qed/qed_rdma_if.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index f464d85e88a4..aeb242cefebf 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -242,10 +242,8 @@ struct qed_rdma_register_tid_in_params { bool pbl_two_level; u8 pbl_page_size_log; u8 page_size_log; - u32 fbo; u64 length; u64 vaddr; - bool zbva; bool phy_mr; bool dma_mr; -- cgit v1.2.3 From 97fb3e33474897c35b7cfe6ff0e16a3e172d9249 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 2 Sep 2020 19:57:38 +0300 Subject: qede: Notify qedr when mtu has changed MTU change on ethtool is currently not supported for iWARP. Notify qedr driver so that appropriate logging can take place. Link: https://lore.kernel.org/r/20200902165741.8355-6-michal.kalderon@marvell.com Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- include/linux/qed/qede_rdma.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h index 072da2f6da37..0d5564a59a59 100644 --- a/include/linux/qed/qede_rdma.h +++ b/include/linux/qed/qede_rdma.h @@ -20,7 +20,8 @@ enum qede_rdma_event { QEDE_UP, QEDE_DOWN, QEDE_CHANGE_ADDR, - QEDE_CLOSE + QEDE_CLOSE, + QEDE_CHANGE_MTU, }; struct qede_rdma_event_work { @@ -54,6 +55,7 @@ void qede_rdma_dev_event_open(struct qede_dev *dev); void qede_rdma_dev_event_close(struct qede_dev *dev); void qede_rdma_dev_remove(struct qede_dev *dev, bool recovery); void qede_rdma_event_changeaddr(struct qede_dev *edr); +void qede_rdma_event_change_mtu(struct qede_dev *edev); #else static inline int qede_rdma_dev_add(struct qede_dev *dev, -- cgit v1.2.3 From 78ff97ebd4e9d02a99ca894b643c78c8fda7df5d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 9 Sep 2020 16:10:57 +0200 Subject: iio: iio.h: fix a warning at the kernel-doc markup There's a warning at iio.h kernel-doc markup: ./include/linux/iio/iio.h:644: WARNING: Unknown target name: "devm". Because it is using {devm_}foo notation. Well, this is not a valid kernel-doc notation. Also, it prevents creating hyperlinks to other documentation functions. So, replace it to a better notation. Signed-off-by: Mauro Carvalho Chehab Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/d8f2275c438c459ede4e6fba03ce719cc6ad898b.1599660067.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/iio/iio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index e2df67a3b9ab..f1daaba9e706 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -641,7 +641,7 @@ static inline struct iio_dev *iio_device_get(struct iio_dev *indio_dev) * * This utility must be called between IIO device allocation * (via devm_iio_device_alloc()) & IIO device registration - * (via {devm_}iio_device_register()). + * (via iio_device_register() and devm_iio_device_register())). * By default, the device allocation will also assign a parent device to * the IIO device object. In cases where devm_iio_device_alloc() is used, * sometimes the parent device must be different than the device used to -- cgit v1.2.3 From 7b26410b05f8c262688de8a689ba8e5d0c3cff01 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 31 Aug 2020 15:27:23 -0700 Subject: block: introduce part_[begin|end]_io_acct These functions can be used to enable iostat for partitions on devices like md, bcache. Signed-off-by: Song Liu Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 37ec5a73d027..5bd96fbab9b4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1933,6 +1933,11 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time); +unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part, + struct bio *bio); +void part_end_io_acct(struct hd_struct *part, struct bio *bio, + unsigned long start_time); + /** * bio_start_io_acct - start I/O accounting for bio based drivers * @bio: bio to start account for -- cgit v1.2.3 From 568a36a69bad4f2efcfa4f94c83aa150a463735c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 10 Sep 2020 19:48:54 +0300 Subject: net: dsa: tag_8021q: include missing refcount.h The previous assumption was that the caller would already have this header file included. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/8021q.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 311aa04e7520..804750122c66 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -5,6 +5,7 @@ #ifndef _NET_DSA_8021Q_H #define _NET_DSA_8021Q_H +#include #include struct dsa_switch; -- cgit v1.2.3 From 7e092af2f3b33694b9117ffd978d42b04ec4f260 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 10 Sep 2020 19:48:55 +0300 Subject: net: dsa: tag_8021q: setup tagging via a single function call There is no point in calling dsa_port_setup_8021q_tagging for each individual port. Additionally, it will become more difficult to do that when we'll have a context structure to tag_8021q (next patch). So refactor this now. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/8021q.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 804750122c66..8586d8cdf956 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -25,8 +25,7 @@ struct dsa_8021q_crosschip_link { #if IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q) -int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index, - bool enabled); +int dsa_8021q_setup(struct dsa_switch *ds, bool enabled); int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port, struct dsa_switch *other_ds, @@ -57,8 +56,7 @@ bool vid_is_dsa_8021q(u16 vid); #else -int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index, - bool enabled) +int dsa_8021q_setup(struct dsa_switch *ds, bool enabled) { return 0; } -- cgit v1.2.3 From 5899ee367ab3fec885aa04d9a2b573bf2e464e7f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 10 Sep 2020 19:48:56 +0300 Subject: net: dsa: tag_8021q: add a context structure While working on another tag_8021q driver implementation, some things became apparent: - It is not mandatory for a DSA driver to offload the tag_8021q VLANs by using the VLAN table per se. For example, it can add custom TCAM rules that simply encapsulate RX traffic, and redirect & decapsulate rules for TX traffic. For such a driver, it makes no sense to receive the tag_8021q configuration through the same callback as it receives the VLAN configuration from the bridge and the 8021q modules. - Currently, sja1105 (the only tag_8021q user) sets a priv->expect_dsa_8021q variable to distinguish between the bridge calling, and tag_8021q calling. That can be improved, to say the least. - The crosschip bridging operations are, in fact, stateful already. The list of crosschip_links must be kept by the caller and passed to the relevant tag_8021q functions. So it would be nice if the tag_8021q configuration was more self-contained. This patch attempts to do that. Create a struct dsa_8021q_context which encapsulates a struct dsa_switch, and has 2 function pointers for adding and deleting a VLAN. These will replace the previous channel to the driver, which was through the .port_vlan_add and .port_vlan_del callbacks of dsa_switch_ops. Also put the list of crosschip_links into this dsa_8021q_context. Drivers that don't support cross-chip bridging can simply omit to initialize this list, as long as they dont call any cross-chip function. The sja1105_vlan_add and sja1105_vlan_del functions are refactored into a smaller sja1105_vlan_add_one, which now has 2 entry points: - sja1105_vlan_add, from struct dsa_switch_ops - sja1105_dsa_8021q_vlan_add, from the tag_8021q ops But even this change is fairly trivial. It just reflects the fact that for sja1105, the VLANs from these 2 channels end up in the same hardware table. However that is not necessarily true in the general sense (and that's the reason for making this change). The rest of the patch is mostly plain refactoring of "ds" -> "ctx". The dsa_8021q_context structure needs to be propagated because adding a VLAN is now done through the ops function pointers inside of it. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/8021q.h | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 8586d8cdf956..2b003ae9fb38 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -12,30 +12,40 @@ struct dsa_switch; struct sk_buff; struct net_device; struct packet_type; +struct dsa_8021q_context; struct dsa_8021q_crosschip_link { struct list_head list; int port; - struct dsa_switch *other_ds; + struct dsa_8021q_context *other_ctx; int other_port; refcount_t refcount; }; +struct dsa_8021q_ops { + int (*vlan_add)(struct dsa_switch *ds, int port, u16 vid, u16 flags); + int (*vlan_del)(struct dsa_switch *ds, int port, u16 vid); +}; + +struct dsa_8021q_context { + const struct dsa_8021q_ops *ops; + struct dsa_switch *ds; + struct list_head crosschip_links; +}; + #define DSA_8021Q_N_SUBVLAN 8 #if IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q) -int dsa_8021q_setup(struct dsa_switch *ds, bool enabled); +int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled); -int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port, - struct dsa_switch *other_ds, - int other_port, - struct list_head *crosschip_links); +int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port, + struct dsa_8021q_context *other_ctx, + int other_port); -int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port, - struct dsa_switch *other_ds, - int other_port, - struct list_head *crosschip_links); +int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port, + struct dsa_8021q_context *other_ctx, + int other_port); struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, u16 tpid, u16 tci); @@ -56,23 +66,21 @@ bool vid_is_dsa_8021q(u16 vid); #else -int dsa_8021q_setup(struct dsa_switch *ds, bool enabled) +int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled) { return 0; } -int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port, - struct dsa_switch *other_ds, - int other_port, - struct list_head *crosschip_links) +int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port, + struct dsa_8021q_context *other_ctx, + int other_port) { return 0; } -int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port, - struct dsa_switch *other_ds, - int other_port, - struct list_head *crosschip_links) +int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port, + struct dsa_8021q_context *other_ctx, + int other_port) { return 0; } -- cgit v1.2.3 From 1623ad8ec04c771a54975fb84b22bc21c2dbcac1 Mon Sep 17 00:00:00 2001 From: Divya Koppera Date: Fri, 11 Sep 2020 18:48:44 +0530 Subject: net: phy: mchp: Add support for LAN8814 QUAD PHY LAN8814 is a low-power, quad-port triple-speed (10BASE-T/100BASETX/1000BASE-T) Ethernet physical layer transceiver (PHY). It supports transmission and reception of data on standard CAT-5, as well as CAT-5e and CAT-6, unshielded twisted pair (UTP) cables. LAN8814 supports industry-standard QSGMII (Quad Serial Gigabit Media Independent Interface) and Q-USGMII (Quad Universal Serial Gigabit Media Independent Interface) providing chip-to-chip connection to four Gigabit Ethernet MACs using a single serialized link (differential pair) in each direction. The LAN8814 SKU supports high-accuracy timestamping functions to support IEEE-1588 solutions using Microchip Ethernet switches, as well as customer solutions based on SoCs and FPGAs. The LAN8804 SKU has same features as that of LAN8814 SKU except that it does not support 1588, SyncE, or Q-USGMII with PCH/MCH. This adds support for 10BASE-T, 100BASE-TX, and 1000BASE-T, QSGMII link with the MAC. Signed-off-by: Divya Koppera Signed-off-by: David S. Miller --- include/linux/micrel_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 75f880c25bb8..416ee6dd2574 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -27,6 +27,7 @@ #define PHY_ID_KSZ8061 0x00221570 #define PHY_ID_KSZ9031 0x00221620 #define PHY_ID_KSZ9131 0x00221640 +#define PHY_ID_LAN8814 0x00221660 #define PHY_ID_KSZ886X 0x00221430 #define PHY_ID_KSZ8863 0x00221435 -- cgit v1.2.3 From dc1129564a0147feb459159fd220ae22357e2eb6 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 11 Sep 2020 21:43:34 -0700 Subject: soc: ti: pruss: Add a platform driver for PRUSS in TI SoCs The Programmable Real-Time Unit - Industrial Communication Subsystem (PRU-ICSS) is present on various TI SoCs such as AM335x or AM437x or the Keystone 66AK2G. Each SoC can have one or more PRUSS instances that may or may not be identical. For example, AM335x SoCs have a single PRUSS, while AM437x has two PRUSS instances PRUSS1 and PRUSS0, with the PRUSS0 being a cut-down version of the PRUSS1. The PRUSS consists of dual 32-bit RISC cores called the Programmable Real-Time Units (PRUs), some shared, data and instruction memories, some internal peripheral modules, and an interrupt controller. The programmable nature of the PRUs provide flexibility to implement custom peripheral interfaces, fast real-time responses, or specialized data handling. The PRU-ICSS functionality is achieved through three different platform drivers addressing a specific portion of the PRUSS. Some sub-modules of the PRU-ICSS IP reuse some of the existing drivers (like davinci mdio driver or the generic syscon driver). This design provides flexibility in representing the different modules of PRUSS accordingly, and at the same time allowing the PRUSS driver to add some instance specific configuration within an SoC. The PRUSS platform driver deals with the overall PRUSS and is used for managing the subsystem level resources like various memories and the CFG module. It is responsible for the creation and deletion of the platform devices for the child PRU devices and other child devices (like Interrupt Controller, MDIO node and some syscon nodes) so that they can be managed by specific platform drivers. The PRUSS interrupt controller is managed by an irqchip driver, while the individual PRU RISC cores are managed by a PRU remoteproc driver. The driver currently supports the AM335x SoC, and support for other TI SoCs will be added in subsequent patches. Signed-off-by: Suman Anna Signed-off-by: Andrew F. Davis Signed-off-by: Tero Kristo Signed-off-by: Grzegorz Jaszczyk Signed-off-by: Santosh Shilimkar --- include/linux/pruss_driver.h | 48 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 include/linux/pruss_driver.h (limited to 'include/linux') diff --git a/include/linux/pruss_driver.h b/include/linux/pruss_driver.h new file mode 100644 index 000000000000..0701fe1953dd --- /dev/null +++ b/include/linux/pruss_driver.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * PRU-ICSS sub-system specific definitions + * + * Copyright (C) 2014-2020 Texas Instruments Incorporated - http://www.ti.com/ + * Suman Anna + */ + +#ifndef _PRUSS_DRIVER_H_ +#define _PRUSS_DRIVER_H_ + +#include + +/* + * enum pruss_mem - PRUSS memory range identifiers + */ +enum pruss_mem { + PRUSS_MEM_DRAM0 = 0, + PRUSS_MEM_DRAM1, + PRUSS_MEM_SHRD_RAM2, + PRUSS_MEM_MAX, +}; + +/** + * struct pruss_mem_region - PRUSS memory region structure + * @va: kernel virtual address of the PRUSS memory region + * @pa: physical (bus) address of the PRUSS memory region + * @size: size of the PRUSS memory region + */ +struct pruss_mem_region { + void __iomem *va; + phys_addr_t pa; + size_t size; +}; + +/** + * struct pruss - PRUSS parent structure + * @dev: pruss device pointer + * @cfg_regmap: regmap for config region + * @mem_regions: data for each of the PRUSS memory regions + */ +struct pruss { + struct device *dev; + struct regmap *cfg_regmap; + struct pruss_mem_region mem_regions[PRUSS_MEM_MAX]; +}; + +#endif /* _PRUSS_DRIVER_H_ */ -- cgit v1.2.3 From ba59c9b43c86b2c2396acac94e41d946cbaec9fe Mon Sep 17 00:00:00 2001 From: Grzegorz Jaszczyk Date: Fri, 11 Sep 2020 21:47:10 -0700 Subject: soc: ti: pruss: support CORECLK_MUX and IEPCLK_MUX The IEPCLK_MUX is present on all SoCs whereas the CORECLK_MUX is present only on AM65x SoCs and J721E. Add support for both these CLK muxes. This allows the clock rates and clock parents for these to be controlled through DT leveraging the clk infrastructure for configuring the default parents and rates. Signed-off-by: Roger Quadros Signed-off-by: Suman Anna Signed-off-by: Grzegorz Jaszczyk Signed-off-by: Santosh Shilimkar --- include/linux/pruss_driver.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pruss_driver.h b/include/linux/pruss_driver.h index 0701fe1953dd..ecfded30ed05 100644 --- a/include/linux/pruss_driver.h +++ b/include/linux/pruss_driver.h @@ -36,13 +36,19 @@ struct pruss_mem_region { /** * struct pruss - PRUSS parent structure * @dev: pruss device pointer + * @cfg_base: base iomap for CFG region * @cfg_regmap: regmap for config region * @mem_regions: data for each of the PRUSS memory regions + * @core_clk_mux: clk handle for PRUSS CORE_CLK_MUX + * @iep_clk_mux: clk handle for PRUSS IEP_CLK_MUX */ struct pruss { struct device *dev; + void __iomem *cfg_base; struct regmap *cfg_regmap; struct pruss_mem_region mem_regions[PRUSS_MEM_MAX]; + struct clk *core_clk_mux; + struct clk *iep_clk_mux; }; #endif /* _PRUSS_DRIVER_H_ */ -- cgit v1.2.3 From 2fc7b1561fbe502f7a08f3df7e0d0cecee985966 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 17 Aug 2020 15:01:30 -0700 Subject: drm/msm: Add private interface for adreno-smmu This interface will be used for drm/msm to coordinate with the qcom_adreno_smmu_impl to enable/disable TTBR0 translation. Once TTBR0 translation is enabled, the GPU's CP (Command Processor) will directly switch TTBR0 pgtables (and do the necessary TLB inv) synchronized to the GPU's operation. But help from the SMMU driver is needed to initially bootstrap TTBR0 translation, which cannot be done from the GPU. Since this is a very special case, a private interface is used to avoid adding highly driver specific things to the public iommu interface. Signed-off-by: Rob Clark Reviewed-by: Jordan Crouse Reviewed-by: Bjorn Andersson --- include/linux/adreno-smmu-priv.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 include/linux/adreno-smmu-priv.h (limited to 'include/linux') diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h new file mode 100644 index 000000000000..a889f28afb42 --- /dev/null +++ b/include/linux/adreno-smmu-priv.h @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Google, Inc + */ + +#ifndef __ADRENO_SMMU_PRIV_H +#define __ADRENO_SMMU_PRIV_H + +#include + +/** + * struct adreno_smmu_priv - private interface between adreno-smmu and GPU + * + * @cookie: An opque token provided by adreno-smmu and passed + * back into the callbacks + * @get_ttbr1_cfg: Get the TTBR1 config for the GPUs context-bank + * @set_ttbr0_cfg: Set the TTBR0 config for the GPUs context bank. A + * NULL config disables TTBR0 translation, otherwise + * TTBR0 translation is enabled with the specified cfg + * + * The GPU driver (drm/msm) and adreno-smmu work together for controlling + * the GPU's SMMU instance. This is by necessity, as the GPU is directly + * updating the SMMU for context switches, while on the other hand we do + * not want to duplicate all of the initial setup logic from arm-smmu. + * + * This private interface is used for the two drivers to coordinate. The + * cookie and callback functions are populated when the GPU driver attaches + * it's domain. + */ +struct adreno_smmu_priv { + const void *cookie; + const struct io_pgtable_cfg *(*get_ttbr1_cfg)(const void *cookie); + int (*set_ttbr0_cfg)(const void *cookie, const struct io_pgtable_cfg *cfg); +}; + +#endif /* __ADRENO_SMMU_PRIV_H */ \ No newline at end of file -- cgit v1.2.3 From dde8ceec9e29fe2abf4fabee7d5a0445d0b75b66 Mon Sep 17 00:00:00 2001 From: Crt Mori Date: Sun, 6 Sep 2020 23:02:31 +0200 Subject: iio: temperature: mlx90632: Interface to change object ambient temperature Since object temperature might be different than the sensor temperature the infrared sensors should provide an interface to inject ambient temperature. This was in past done via write to ambient temperature interface (in_temp_ambient_raw), but I think most people did not know about it. This solution introduces a new iio type of the CALIBAMBIENT which is hopefully more descriptive and more explicit about the purpose and capabilities of the sensors. Signed-off-by: Crt Mori Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200906210231.383976-1-cmo@melexis.com Signed-off-by: Jonathan Cameron --- include/linux/iio/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index e6fd3645963c..1e3ed6f55bca 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -59,6 +59,7 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_CALIBEMISSIVITY, IIO_CHAN_INFO_OVERSAMPLING_RATIO, IIO_CHAN_INFO_THERMOCOUPLE_TYPE, + IIO_CHAN_INFO_CALIBAMBIENT, }; #endif /* _IIO_TYPES_H_ */ -- cgit v1.2.3 From c5e5ec033c4ab25c53f1fd217849e75deb0bf7bf Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 May 2020 10:41:00 +0100 Subject: genirq: Add fasteoi IPI flow For irqchips using the fasteoi flow, IPIs are a bit special. They need to be EOI'd early (before calling the handler), as funny things may happen in the handler (they do not necessarily behave like a normal interrupt). Reviewed-by: Valentin Schneider Signed-off-by: Marc Zyngier --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 1b7f4dfee35b..57205bbf46bf 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -634,6 +634,7 @@ static inline int irq_set_parent(int irq, int parent_irq) */ extern void handle_level_irq(struct irq_desc *desc); extern void handle_fasteoi_irq(struct irq_desc *desc); +extern void handle_percpu_devid_fasteoi_ipi(struct irq_desc *desc); extern void handle_edge_irq(struct irq_desc *desc); extern void handle_edge_eoi_irq(struct irq_desc *desc); extern void handle_simple_irq(struct irq_desc *desc); -- cgit v1.2.3 From 83cfac95c01817819c2a51f0931d798d851f8a08 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 May 2020 14:58:13 +0100 Subject: genirq: Allow interrupts to be excluded from /proc/interrupts A number of architectures implement IPI statistics directly, duplicating the core kstat_irqs accounting. As we move IPIs to being actual IRQs, we would end-up with a confusing display in /proc/interrupts (where the IPIs would appear twice). In order to solve this, allow interrupts to be flagged as "hidden", which excludes them from /proc/interrupts. Reviewed-by: Valentin Schneider Signed-off-by: Marc Zyngier --- include/linux/irq.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 57205bbf46bf..63b9d962ee67 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -71,6 +71,7 @@ enum irqchip_irq_state; * it from the spurious interrupt detection * mechanism and from core side polling. * IRQ_DISABLE_UNLAZY - Disable lazy irq disable + * IRQ_HIDDEN - Don't show up in /proc/interrupts */ enum { IRQ_TYPE_NONE = 0x00000000, @@ -97,13 +98,14 @@ enum { IRQ_PER_CPU_DEVID = (1 << 17), IRQ_IS_POLLED = (1 << 18), IRQ_DISABLE_UNLAZY = (1 << 19), + IRQ_HIDDEN = (1 << 20), }; #define IRQF_MODIFY_MASK \ (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ - IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY) + IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_HIDDEN) #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) -- cgit v1.2.3 From 66d90f6ecee755e9c19a119c9255e80091165498 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 7 Sep 2020 12:09:23 +0100 Subject: firmware: arm_scmi: Enable building as a single module Now, with all the plumbing in place to enable building scmi as a module instead of built-in modules, let us enable the same. Link: https://lore.kernel.org/r/20200907195046.56615-5-sudeep.holla@arm.com Tested-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 4b10093ad671..9cd312a1ff92 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -345,7 +345,7 @@ struct scmi_driver { #define to_scmi_driver(d) container_of(d, struct scmi_driver, driver) -#ifdef CONFIG_ARM_SCMI_PROTOCOL +#if IS_REACHABLE(CONFIG_ARM_SCMI_PROTOCOL) int scmi_driver_register(struct scmi_driver *driver, struct module *owner, const char *mod_name); void scmi_driver_unregister(struct scmi_driver *driver); -- cgit v1.2.3 From 6b6ff4acb310a0351005474673f1e09a90020efd Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 9 Sep 2020 10:54:24 +0200 Subject: device: property: add helpers to count items in string arrays Instead of doing the following: count = device_property_read_string_array(dev, propname, NULL, 0); Let's provide inline helpers with hardcoded arguments for counting strings in property arrays. Suggested-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Reviewed-by: Mika Westerberg Reviewed-by: Andy Shevchenko --- include/linux/property.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 9f805c442819..75c178055bc9 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -170,6 +170,12 @@ static inline int device_property_count_u64(struct device *dev, const char *prop return device_property_read_u64_array(dev, propname, NULL, 0); } +static inline int device_property_string_array_count(struct device *dev, + const char *propname) +{ + return device_property_read_string_array(dev, propname, NULL, 0); +} + static inline bool fwnode_property_read_bool(const struct fwnode_handle *fwnode, const char *propname) { @@ -224,6 +230,13 @@ static inline int fwnode_property_count_u64(const struct fwnode_handle *fwnode, return fwnode_property_read_u64_array(fwnode, propname, NULL, 0); } +static inline int +fwnode_property_string_array_count(const struct fwnode_handle *fwnode, + const char *propname) +{ + return fwnode_property_read_string_array(fwnode, propname, NULL, 0); +} + struct software_node; /** -- cgit v1.2.3 From 7cba1a4d5e1628e099728d849918de50dab2e24e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 9 Sep 2020 10:54:25 +0200 Subject: gpiolib: generalize devprop_gpiochip_set_names() for device properties devprop_gpiochip_set_names() is overly complicated with taking the fwnode argument (which requires using dev_fwnode() & of_fwnode_handle() in ACPI and OF GPIO code respectively). Let's just switch to using the generic device properties. This allows us to pull the code setting line names directly into gpiochip_add_data_with_key() instead of handling it separately for ACPI and OF. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg --- include/linux/gpio/driver.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index d1cef5c2715c..56485a040b82 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -756,8 +756,7 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, enum gpiod_flags dflags); void gpiochip_free_own_desc(struct gpio_desc *desc); -void devprop_gpiochip_set_names(struct gpio_chip *gc, - const struct fwnode_handle *fwnode); +int devprop_gpiochip_set_names(struct gpio_chip *gc); #ifdef CONFIG_GPIOLIB -- cgit v1.2.3 From 32fc5aa2df12c7a95dbd1c2c9ee3eb8d7f920d9e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 9 Sep 2020 10:54:26 +0200 Subject: gpiolib: unexport devprop_gpiochip_set_names() Now that devprop_gpiochip_set_names() is only used in a single place inside drivers/gpio/gpiolib.c, there's no need anymore for it to be exported or to even live in its own source file. Pull this function into the core source file for gpiolib. Signed-off-by: Bartosz Golaszewski Reviewed-by: Mika Westerberg Reviewed-by: Andy Shevchenko --- include/linux/gpio/driver.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 56485a040b82..4a7e295c3640 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -756,8 +756,6 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, enum gpiod_flags dflags); void gpiochip_free_own_desc(struct gpio_desc *desc); -int devprop_gpiochip_set_names(struct gpio_chip *gc); - #ifdef CONFIG_GPIOLIB /* lock/unlock as IRQ */ -- cgit v1.2.3 From 6d885330fa3daede6bf24b7422da9d6d9a577eb8 Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Mon, 17 Aug 2020 10:27:22 +0200 Subject: media: firmware: qcom_scm: Add memory protect virtual address ranges This adds a new SCM memprotect command to set virtual address ranges. Signed-off-by: Stanimir Varbanov Reviewed-by: Elliot Berman Acked-by: Bjorn Andersson Signed-off-by: Mauro Carvalho Chehab --- include/linux/qcom_scm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 2e1193a3fb5f..0165824c5128 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -84,6 +84,9 @@ extern bool qcom_scm_restore_sec_cfg_available(void); extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); +extern int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, + u32 cp_nonpixel_start, + u32 cp_nonpixel_size); extern int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, unsigned int *src, const struct qcom_scm_vmperm *newvm, @@ -141,6 +144,10 @@ static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) { return -ENODEV; } static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) { return -ENODEV; } +extern inline int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, + u32 cp_nonpixel_start, + u32 cp_nonpixel_size) + { return -ENODEV; } static inline int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, unsigned int *src, const struct qcom_scm_vmperm *newvm, unsigned int dest_cnt) { return -ENODEV; } -- cgit v1.2.3 From 1fa5cef283420b3dad93cd6ab04d7125bc1562de Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 18 Aug 2020 15:07:57 +0800 Subject: i40e: optimise prefetch page refcount refcount of rx_buffer page will be added here originally, so prefetchw is needed, but after commit 1793668c3b8c ("i40e/i40evf: Update code to better handle incrementing page count"), and refcount is not added every time, so change prefetchw as prefetch. Now it mainly services page_address(), but which accesses struct page only when WANT_PAGE_VIRTUAL or HASHED_PAGE_VIRTUAL is defined otherwise it returns address based on offset, so we prefetch it conditionally. Jakub suggested to define prefetch_page_address in a common header. Reported-by: kernel test robot Suggested-by: Jakub Kicinski Signed-off-by: Li RongQing Reviewed-by: Jesse Brandeburg Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- include/linux/prefetch.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h index 13eafebf3549..b83a3f944f28 100644 --- a/include/linux/prefetch.h +++ b/include/linux/prefetch.h @@ -15,6 +15,7 @@ #include #include +struct page; /* prefetch(x) attempts to pre-emptively get the memory pointed to by address "x" into the CPU L1 cache. @@ -62,4 +63,11 @@ static inline void prefetch_range(void *addr, size_t len) #endif } +static inline void prefetch_page_address(struct page *page) +{ +#if defined(WANT_PAGE_VIRTUAL) || defined(HASHED_PAGE_VIRTUAL) + prefetch(page); +#endif +} + #endif -- cgit v1.2.3 From 36d818f610f218e9e8711bf498b62a27778dae3f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 11 Sep 2020 20:02:02 +0300 Subject: kernel.h: Move oops_in_progress to printk.h The oops_in_progress is defined in printk.c, so it's logical to move oops_in_progress to printk.h. Signed-off-by: Andy Shevchenko Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20200911170202.8565-1-andriy.shevchenko@linux.intel.com --- include/linux/debug_locks.h | 2 +- include/linux/kernel.h | 1 - include/linux/printk.h | 2 ++ 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index e7e45f0cc7da..2915f56ad421 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -2,9 +2,9 @@ #ifndef __LINUX_DEBUG_LOCKING_H #define __LINUX_DEBUG_LOCKING_H -#include #include #include +#include struct task_struct; diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 82d91547d122..6cef4ca5589c 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -527,7 +527,6 @@ extern unsigned int sysctl_oops_all_cpu_backtrace; #endif /* CONFIG_SMP */ extern void bust_spinlocks(int yes); -extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ extern int panic_timeout; extern unsigned long panic_print; extern int panic_on_oops; diff --git a/include/linux/printk.h b/include/linux/printk.h index 34c1a7be3e01..f749a2c2e7a5 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -12,6 +12,8 @@ extern const char linux_banner[]; extern const char linux_proc_banner[]; +extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ + #define PRINTK_MAX_SINGLE_HEADER_LEN 2 static inline int printk_get_level(const char *buffer) -- cgit v1.2.3 From cf6501689012ef346cc835a38cbebb1e398d0834 Mon Sep 17 00:00:00 2001 From: Andrew Scull Date: Tue, 15 Sep 2020 11:46:39 +0100 Subject: smccc: Define vendor hyp owned service call region Vendor specific hypervisor services have their own region of function identifiers reserved by SMCCC. Extend the list of owners to include this case. Signed-off-by: Andrew Scull Signed-off-by: Marc Zyngier Cc: Sudeep Holla Link: https://lore.kernel.org/r/20200915104643.2543892-16-ascull@google.com --- include/linux/arm-smccc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 15c706fb0a37..ee286f5de239 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -49,6 +49,7 @@ #define ARM_SMCCC_OWNER_OEM 3 #define ARM_SMCCC_OWNER_STANDARD 4 #define ARM_SMCCC_OWNER_STANDARD_HYP 5 +#define ARM_SMCCC_OWNER_VENDOR_HYP 6 #define ARM_SMCCC_OWNER_TRUSTED_APP 48 #define ARM_SMCCC_OWNER_TRUSTED_APP_END 49 #define ARM_SMCCC_OWNER_TRUSTED_OS 50 -- cgit v1.2.3 From 0794a974d74dc777a212a3c58dd236f507360348 Mon Sep 17 00:00:00 2001 From: Andrew Scull Date: Tue, 15 Sep 2020 11:46:40 +0100 Subject: smccc: Use separate variables for args and results Using the same register-bound variable for both arguments and results means these values share a type. That type must allow the arguments to be assigned to it and must also be assignable to the unsigned long fields of struct arm_smccc_res. This restriction on types causes compiler warnings when the argument cannot be implicitly assigned to an unsigned long, for example the pointers that are used in the KVM hyp interface. By separating the arguments and results into their own variables, the type constraint is lifted allowing the arguments to avoid the need for any type conversion. Signed-off-by: Andrew Scull Signed-off-by: Marc Zyngier Cc: Sudeep Holla Link: https://lore.kernel.org/r/20200915104643.2543892-17-ascull@google.com --- include/linux/arm-smccc.h | 73 +++++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index ee286f5de239..885c9ffc835c 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -228,87 +228,67 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define __count_args(...) \ ___count_args(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0) -#define __constraint_write_0 \ - "+r" (r0), "=&r" (r1), "=&r" (r2), "=&r" (r3) -#define __constraint_write_1 \ - "+r" (r0), "+r" (r1), "=&r" (r2), "=&r" (r3) -#define __constraint_write_2 \ - "+r" (r0), "+r" (r1), "+r" (r2), "=&r" (r3) -#define __constraint_write_3 \ - "+r" (r0), "+r" (r1), "+r" (r2), "+r" (r3) -#define __constraint_write_4 __constraint_write_3 -#define __constraint_write_5 __constraint_write_4 -#define __constraint_write_6 __constraint_write_5 -#define __constraint_write_7 __constraint_write_6 - -#define __constraint_read_0 -#define __constraint_read_1 -#define __constraint_read_2 -#define __constraint_read_3 -#define __constraint_read_4 "r" (r4) -#define __constraint_read_5 __constraint_read_4, "r" (r5) -#define __constraint_read_6 __constraint_read_5, "r" (r6) -#define __constraint_read_7 __constraint_read_6, "r" (r7) +#define __constraint_read_0 "r" (arg0) +#define __constraint_read_1 __constraint_read_0, "r" (arg1) +#define __constraint_read_2 __constraint_read_1, "r" (arg2) +#define __constraint_read_3 __constraint_read_2, "r" (arg3) +#define __constraint_read_4 __constraint_read_3, "r" (arg4) +#define __constraint_read_5 __constraint_read_4, "r" (arg5) +#define __constraint_read_6 __constraint_read_5, "r" (arg6) +#define __constraint_read_7 __constraint_read_6, "r" (arg7) #define __declare_arg_0(a0, res) \ struct arm_smccc_res *___res = res; \ - register unsigned long r0 asm("r0") = (u32)a0; \ - register unsigned long r1 asm("r1"); \ - register unsigned long r2 asm("r2"); \ - register unsigned long r3 asm("r3") + register unsigned long arg0 asm("r0") = (u32)a0 #define __declare_arg_1(a0, a1, res) \ typeof(a1) __a1 = a1; \ struct arm_smccc_res *___res = res; \ - register unsigned long r0 asm("r0") = (u32)a0; \ - register unsigned long r1 asm("r1") = __a1; \ - register unsigned long r2 asm("r2"); \ - register unsigned long r3 asm("r3") + register unsigned long arg0 asm("r0") = (u32)a0; \ + register typeof(a1) arg1 asm("r1") = __a1 #define __declare_arg_2(a0, a1, a2, res) \ typeof(a1) __a1 = a1; \ typeof(a2) __a2 = a2; \ struct arm_smccc_res *___res = res; \ - register unsigned long r0 asm("r0") = (u32)a0; \ - register unsigned long r1 asm("r1") = __a1; \ - register unsigned long r2 asm("r2") = __a2; \ - register unsigned long r3 asm("r3") + register unsigned long arg0 asm("r0") = (u32)a0; \ + register typeof(a1) arg1 asm("r1") = __a1; \ + register typeof(a2) arg2 asm("r2") = __a2 #define __declare_arg_3(a0, a1, a2, a3, res) \ typeof(a1) __a1 = a1; \ typeof(a2) __a2 = a2; \ typeof(a3) __a3 = a3; \ struct arm_smccc_res *___res = res; \ - register unsigned long r0 asm("r0") = (u32)a0; \ - register unsigned long r1 asm("r1") = __a1; \ - register unsigned long r2 asm("r2") = __a2; \ - register unsigned long r3 asm("r3") = __a3 + register unsigned long arg0 asm("r0") = (u32)a0; \ + register typeof(a1) arg1 asm("r1") = __a1; \ + register typeof(a2) arg2 asm("r2") = __a2; \ + register typeof(a3) arg3 asm("r3") = __a3 #define __declare_arg_4(a0, a1, a2, a3, a4, res) \ typeof(a4) __a4 = a4; \ __declare_arg_3(a0, a1, a2, a3, res); \ - register unsigned long r4 asm("r4") = __a4 + register typeof(a4) arg4 asm("r4") = __a4 #define __declare_arg_5(a0, a1, a2, a3, a4, a5, res) \ typeof(a5) __a5 = a5; \ __declare_arg_4(a0, a1, a2, a3, a4, res); \ - register unsigned long r5 asm("r5") = __a5 + register typeof(a5) arg5 asm("r5") = __a5 #define __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res) \ typeof(a6) __a6 = a6; \ __declare_arg_5(a0, a1, a2, a3, a4, a5, res); \ - register unsigned long r6 asm("r6") = __a6 + register typeof(a6) arg6 asm("r6") = __a6 #define __declare_arg_7(a0, a1, a2, a3, a4, a5, a6, a7, res) \ typeof(a7) __a7 = a7; \ __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res); \ - register unsigned long r7 asm("r7") = __a7 + register typeof(a7) arg7 asm("r7") = __a7 #define ___declare_args(count, ...) __declare_arg_ ## count(__VA_ARGS__) #define __declare_args(count, ...) ___declare_args(count, __VA_ARGS__) #define ___constraints(count) \ - : __constraint_write_ ## count \ : __constraint_read_ ## count \ : "memory" #define __constraints(count) ___constraints(count) @@ -320,8 +300,13 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, */ #define __arm_smccc_1_1(inst, ...) \ do { \ + register unsigned long r0 asm("r0"); \ + register unsigned long r1 asm("r1"); \ + register unsigned long r2 asm("r2"); \ + register unsigned long r3 asm("r3"); \ __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \ - asm volatile(inst "\n" \ + asm volatile(inst "\n" : \ + "=r" (r0), "=r" (r1), "=r" (r2), "=r" (r3) \ __constraints(__count_args(__VA_ARGS__))); \ if (___res) \ *___res = (typeof(*___res)){r0, r1, r2, r3}; \ @@ -367,7 +352,7 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define __fail_smccc_1_1(...) \ do { \ __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \ - asm ("" __constraints(__count_args(__VA_ARGS__))); \ + asm ("" : __constraints(__count_args(__VA_ARGS__))); \ if (___res) \ ___res->a0 = SMCCC_RET_NOT_SUPPORTED; \ } while (0) -- cgit v1.2.3 From fb609b5112bd74b4ba93c86d7af4089ffd9432c2 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 13 May 2020 11:06:47 +0300 Subject: net/mlx5: Always use container_of to find mdev pointer from clock struct Clock struct is part of struct mlx5_core_dev. Code was inconsistent, on some cases used container_of and on another used clock->mdev. Align code to use container_of amd remove clock->mdev pointer. While here, fix reverse xmas tree coding style. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh --- include/linux/mlx5/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c145de0473bc..8dc3da6e6480 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -643,7 +643,6 @@ struct mlx5_pps { }; struct mlx5_clock { - struct mlx5_core_dev *mdev; struct mlx5_nb pps_nb; seqlock_t lock; struct cyclecounter cycles; -- cgit v1.2.3 From b7cf0806e8783e38f881cae3c56f0869e70b8da2 Mon Sep 17 00:00:00 2001 From: Ofer Levi Date: Sun, 17 May 2020 10:16:49 +0300 Subject: net/mlx5e: Add CQE compression support for multi-strides packets Add CQE compression support for completions of packets that span multiple strides in a Striding RQ, per the HW capability. In our memory model, we use small strides (256B as of today) for the non-linear SKB mode. This feature allows CQE compression to work also for multiple strides packets. In this case decompressing the mini CQE array will use stride index provided by HW as part of the mini CQE. Before this feature, compression was possible only for single-strided packets, i.e. for packets of size up to 256 bytes when in non-linear mode, and the index was maintained by SW. This feature is supported for ConnectX-5 and above. Feature performance test: This was whitebox-tested, we reduced the PCI speed from 125Gb/s to 62.5Gb/s to overload pci and manipulated mlx5 driver to drop incoming packets before building the SKB to achieve low cpu utilization. Outcome is low cpu utilization and bottleneck on pci only. Test setup: Server: Intel(R) Xeon(R) Silver 4108 CPU @ 1.80GHz server, 32 cores NIC: ConnectX-6 DX. Sender side generates 300 byte packets at full pci bandwidth. Receiver side configuration: Single channel, one cpu processing with one ring allocated. Cpu utilization is ~20% while pci bandwidth is fully utilized. For the generated traffic and interface MTU of 4500B (to activate the non-linear SKB mode), packet rate improvement is about 19% from ~17.6Mpps to ~21Mpps. Without this feature, counters show no CQE compression blocks for this setup, while with the feature, counters show ~20.7Mpps compressed CQEs in ~500K compression blocks. Signed-off-by: Ofer Levi Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4d3376e20f5e..81ca5989009b 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -816,7 +816,7 @@ struct mlx5_mini_cqe8 { __be32 rx_hash_result; struct { __be16 checksum; - __be16 rsvd; + __be16 stridx; }; struct { __be16 wqe_counter; @@ -836,6 +836,7 @@ enum { enum { MLX5_CQE_FORMAT_CSUM = 0x1, + MLX5_CQE_FORMAT_CSUM_STRIDX = 0x3, }; #define MLX5_MINI_CQE_ARRAY_SIZE 8 -- cgit v1.2.3 From 9a27a33027f22a716ce362be48d70ae0eb012ab7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Sep 2020 17:11:52 -0700 Subject: ethtool: add standard pause stats Currently drivers have to report their pause frames statistics via ethtool -S, and there is a wide variety of names used for these statistics. Add the two statistics defined in IEEE 802.3x to the standard API. Create a new ethtool request header flag for including statistics in the response to GET commands. Always create the ETHTOOL_A_PAUSE_STATS nest in replies when flag is set. Testing if driver declares the op is not a reliable way of checking if any stats will actually be included and therefore we don't want to give the impression that presence of ETHTOOL_A_PAUSE_STATS indicates driver support. Note that this patch does not include PFC counters, which may fit better in dcbnl? But mostly I don't need them/have a setup to test them so I haven't looked deeply into exposing them :) v3: - add a helper for "uninitializing" stats, rather than a cryptic memset() (Andrew) Signed-off-by: Jakub Kicinski Reviewed-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/ethtool.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 969a80211df6..060b20f0b20f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -241,6 +241,27 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, ETHTOOL_COALESCE_PKT_RATE_LOW | ETHTOOL_COALESCE_PKT_RATE_HIGH | \ ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL) +#define ETHTOOL_STAT_NOT_SET (~0ULL) + +/** + * struct ethtool_pause_stats - statistics for IEEE 802.3x pause frames + * @tx_pause_frames: transmitted pause frame count. Reported to user space + * as %ETHTOOL_A_PAUSE_STAT_TX_FRAMES. + * + * Equivalent to `30.3.4.2 aPAUSEMACCtrlFramesTransmitted` + * from the standard. + * + * @rx_pause_frames: received pause frame count. Reported to user space + * as %ETHTOOL_A_PAUSE_STAT_RX_FRAMES. Equivalent to: + * + * Equivalent to `30.3.4.3 aPAUSEMACCtrlFramesReceived` + * from the standard. + */ +struct ethtool_pause_stats { + u64 tx_pause_frames; + u64 rx_pause_frames; +}; + /** * struct ethtool_ops - optional netdev operations * @supported_coalesce_params: supported types of interrupt coalescing. @@ -282,6 +303,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * Returns a negative error code or zero. * @get_ringparam: Report ring sizes * @set_ringparam: Set ring sizes. Returns a negative error code or zero. + * @get_pause_stats: Report pause frame statistics. Drivers must not zero + * statistics which they don't report. The stats structure is initialized + * to ETHTOOL_STAT_NOT_SET indicating driver does not report statistics. * @get_pauseparam: Report pause parameters * @set_pauseparam: Set pause parameters. Returns a negative error code * or zero. @@ -418,6 +442,8 @@ struct ethtool_ops { struct ethtool_ringparam *); int (*set_ringparam)(struct net_device *, struct ethtool_ringparam *); + void (*get_pause_stats)(struct net_device *dev, + struct ethtool_pause_stats *pause_stats); void (*get_pauseparam)(struct net_device *, struct ethtool_pauseparam*); int (*set_pauseparam)(struct net_device *, -- cgit v1.2.3 From 984fe94f94756dacb3c8cc52904a23adf9e04da1 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Tue, 15 Sep 2020 16:45:39 -0700 Subject: bpf: Mutex protect used_maps array and count To support modifying the used_maps array, we use a mutex to protect the use of the counter and the array. The mutex is initialized right after the prog aux is allocated, and destroyed right before prog aux is freed. This way we guarantee it's initialized for both cBPF and eBPF. Signed-off-by: YiFei Zhu Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Cc: YiFei Zhu Link: https://lore.kernel.org/bpf/20200915234543.3220146-2-sdf@google.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c6d9f2c444f4..5dcce0364634 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -751,6 +751,7 @@ struct bpf_prog_aux { struct bpf_ksym ksym; const struct bpf_prog_ops *ops; struct bpf_map **used_maps; + struct mutex used_maps_mutex; /* mutex for used_maps and used_map_cnt */ struct bpf_prog *prog; struct user_struct *user; u64 load_time; /* ns since boottime */ -- cgit v1.2.3 From fcc2cc1f35613c016e1de25bb001bfdd9eaa25f9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 14 Sep 2020 17:37:46 +0200 Subject: USB: move snd_usb_pipe_sanity_check into the USB core snd_usb_pipe_sanity_check() is a great function, so let's move it into the USB core so that other parts of the kernel, including the USB core, can call it. Name it usb_pipe_type_check() to match the existing usb_urb_ep_type_check() call, which now uses this function. Cc: Jaroslav Kysela Cc: "Gustavo A. R. Silva" Cc: Eli Billauer Cc: Emiliano Ingrassia Cc: Alan Stern Cc: Alexander Tsoy Cc: "Geoffrey D. Bennett" Cc: Jussi Laako Cc: Nick Kossifidis Cc: Dmitry Panchenko Cc: Chris Wulff Cc: Jesus Ramos Reviewed-by: Takashi Iwai Link: https://lore.kernel.org/r/20200914153756.3412156-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 20c555db4621..0b3963d7ec38 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1764,6 +1764,7 @@ static inline int usb_urb_dir_out(struct urb *urb) return (urb->transfer_flags & URB_DIR_MASK) == URB_DIR_OUT; } +int usb_pipe_type_check(struct usb_device *dev, unsigned int pipe); int usb_urb_ep_type_check(const struct urb *urb); void *usb_alloc_coherent(struct usb_device *dev, size_t size, -- cgit v1.2.3 From 719b8f2850d3d9b863cc5e4f08e9ef0206e45b26 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 14 Sep 2020 17:37:47 +0200 Subject: USB: add usb_control_msg_send() and usb_control_msg_recv() New core functions to make sending/receiving USB control messages easier and saner. In discussions, it turns out that the large majority of users of usb_control_msg() do so in potentially incorrect ways. The most common issue is where a "short" message is received, yet never detected properly due to "incorrect" error handling. Handle all of this in the USB core with two new functions to try to make working with USB control messages simpler. No more need for dynamic data, messages can be on the stack, and only "complete" send/receive will work without causing an error. Link: https://lore.kernel.org/r/20200914153756.3412156-3-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 0b3963d7ec38..a5460f08126e 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1802,6 +1802,12 @@ extern int usb_bulk_msg(struct usb_device *usb_dev, unsigned int pipe, int timeout); /* wrappers around usb_control_msg() for the most common standard requests */ +int usb_control_msg_send(struct usb_device *dev, __u8 endpoint, __u8 request, + __u8 requesttype, __u16 value, __u16 index, + const void *data, __u16 size, int timeout); +int usb_control_msg_recv(struct usb_device *dev, __u8 endpoint, __u8 request, + __u8 requesttype, __u16 value, __u16 index, + void *data, __u16 size, int timeout); extern int usb_get_descriptor(struct usb_device *dev, unsigned char desctype, unsigned char descindex, void *buf, int size); extern int usb_get_status(struct usb_device *dev, -- cgit v1.2.3 From 3babbe447d76ac2919ec4d0eb3b0adfb22f5b03c Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Fri, 7 Aug 2020 13:15:16 +0530 Subject: sched/topology: Allow archs to override cpu_smt_mask cpu_smt_mask tracks topology_sibling_cpumask. This would be good for most architectures. One of the users of cpu_smt_mask(), would be to identify idle-cores. On Power9, a pair of SMT4 cores can be presented by the firmware as a SMT8 core for backward compatibility reasons. powerpc allows LPARs to be live migrated from Power8 to Power9. Do note Power8 had only SMT8 cores. Existing software which has been developed/configured for Power8 would expect to see SMT8 core. Maintaining the illusion of SMT8 core is a requirement to make that work. In order to maintain above userspace backward compatibility with previous versions of processor, Power9 onwards there is option to the firmware to advertise a pair of SMT4 cores as a fused cores aka SMT8 core. On Power9 this pair shares the L2 cache as well. However, from the scheduler's point of view, a core should be determined by SMT4, since its a completely independent unit of compute. Hence allow powerpc architecture to override the default cpu_smt_mask() to point to the SMT4 cores in a SMT8 mode. This will ensure the scheduler is always given the right information. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200807074517.27957-1-srikar@linux.vnet.ibm.com --- include/linux/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 608fa4aadf0e..ad03df1cc266 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -198,7 +198,7 @@ static inline int cpu_to_mem(int cpu) #define topology_die_cpumask(cpu) cpumask_of(cpu) #endif -#ifdef CONFIG_SCHED_SMT +#if defined(CONFIG_SCHED_SMT) && !defined(cpu_smt_mask) static inline const struct cpumask *cpu_smt_mask(int cpu) { return topology_sibling_cpumask(cpu); -- cgit v1.2.3 From a25536e8d57dd6fdd70b07219fc2c38e3b8c6503 Mon Sep 17 00:00:00 2001 From: Swapnil Jakhade Date: Fri, 11 Sep 2020 08:18:33 +0200 Subject: phy: Add new PHY attribute max_link_rate Add new PHY attribute max_link_rate to struct phy_attrs. This indicates maximum link rate supported by PHY (in Mbps). Signed-off-by: Yuti Amonkar Signed-off-by: Swapnil Jakhade Reviewed-by: Laurent Pinchart Acked-by: Kishon Vijay Abraham I Link: https://lore.kernel.org/r/1599805114-22063-2-git-send-email-sjakhade@cadence.com Signed-off-by: Vinod Koul --- include/linux/phy/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index bcee8eba62b3..e435bdb0bab3 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -115,10 +115,12 @@ struct phy_ops { /** * struct phy_attrs - represents phy attributes * @bus_width: Data path width implemented by PHY + * @max_link_rate: Maximum link rate supported by PHY (in Mbps) * @mode: PHY mode */ struct phy_attrs { u32 bus_width; + u32 max_link_rate; enum phy_mode mode; }; -- cgit v1.2.3 From 42d8a346c5c06689f4f25aecfa287a5aca501a55 Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Wed, 16 Sep 2020 14:21:29 +0800 Subject: ipmi: add retry in try_get_dev_id() Use a retry machanism to give the BMC more opportunities to correctly respond when we receive specific completion codes. This is similar to what is done in __get_device_id(). Signed-off-by: Xianting Tian Message-Id: <20200916062129.26129-1-tian.xianting@h3c.com> [Moved GET_DEVICE_ID_MAX_RETRY to include/linux/ipmi.h, reworded some text.] Signed-off-by: Corey Minyard --- include/linux/ipmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index ef61676cfe05..52850a02a3d0 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -333,4 +333,6 @@ struct ipmi_smi_info { /* This is to get the private info of struct ipmi_smi */ extern int ipmi_get_smi_info(int if_num, struct ipmi_smi_info *data); +#define GET_DEVICE_ID_MAX_RETRY 5 + #endif /* __LINUX_IPMI_H */ -- cgit v1.2.3 From 267580db047ef428a70bef8287ca62c5a450c139 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Tue, 15 Sep 2020 16:30:28 +0200 Subject: seqlock: Unbreak lockdep seqcount_LOCKNAME_init() needs to be a macro due to the lockdep annotation in seqcount_init(). Since a macro cannot define another macro, we need to effectively revert commit: e4e9ab3f9f91 ("seqlock: Fold seqcount_LOCKNAME_init() definition"). Fixes: e4e9ab3f9f91 ("seqlock: Fold seqcount_LOCKNAME_init() definition") Reported-by: Qian Cai Debugged-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Tested-by: Qian Cai Link: https://lkml.kernel.org/r/20200915143028.GB2674@hirez.programming.kicks-ass.net --- include/linux/seqlock.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index f73c7eb68f27..76e44e6c0100 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -173,6 +173,19 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * @lock: Pointer to the associated lock */ +#define seqcount_LOCKNAME_init(s, _lock, lockname) \ + do { \ + seqcount_##lockname##_t *____s = (s); \ + seqcount_init(&____s->seqcount); \ + __SEQ_LOCK(____s->lock = (_lock)); \ + } while (0) + +#define seqcount_raw_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, raw_spinlock) +#define seqcount_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, spinlock) +#define seqcount_rwlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, rwlock); +#define seqcount_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, mutex); +#define seqcount_ww_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, ww_mutex); + /* * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers * seqprop_LOCKNAME_*() - Property accessors for seqcount_LOCKNAME_t @@ -190,13 +203,6 @@ typedef struct seqcount_##lockname { \ __SEQ_LOCK(locktype *lock); \ } seqcount_##lockname##_t; \ \ -static __always_inline void \ -seqcount_##lockname##_init(seqcount_##lockname##_t *s, locktype *lock) \ -{ \ - seqcount_init(&s->seqcount); \ - __SEQ_LOCK(s->lock = lock); \ -} \ - \ static __always_inline seqcount_t * \ __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ { \ @@ -284,8 +290,8 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu __SEQ_LOCK(.lock = (assoc_lock)) \ } -#define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) +#define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) -- cgit v1.2.3 From dfb9eb7cf6cd0c0b0f2a1111fcc47b0a297b097d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 Aug 2020 13:16:45 +0200 Subject: PCI/MSI: Rework pci_msi_domain_calc_hwirq() Retrieve the PCI device from the msi descriptor instead of doing so at the call sites. Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20200826112332.352583299@linutronix.de --- include/linux/msi.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index 8ad679e9d9c0..d360cc7c4f94 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -369,8 +369,7 @@ void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg); struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev, - struct msi_desc *desc); +irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc); int pci_msi_domain_check_cap(struct irq_domain *domain, struct msi_domain_info *info, struct device *dev); u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); -- cgit v1.2.3 From 9006c133a422f474d7d8e10a8baae179f70c22f5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 Aug 2020 13:16:47 +0200 Subject: x86/msi: Use generic MSI domain ops pci_msi_get_hwirq() and pci_msi_set_desc are not longer special. Enable the generic MSI domain ops in the core and PCI MSI code unconditionally and get rid of the x86 specific implementations in the X86 MSI code and in the hyperv PCI driver. Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20200826112332.564274859@linutronix.de --- include/linux/msi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index d360cc7c4f94..5aa126b7820b 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -369,7 +369,6 @@ void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg); struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc); int pci_msi_domain_check_cap(struct irq_domain *domain, struct msi_domain_info *info, struct device *dev); u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); -- cgit v1.2.3 From c6c9e2838c5f0b94773511586123bcb125757f2a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 Aug 2020 13:16:51 +0200 Subject: irqdomain/msi: Provide DOMAIN_BUS_VMD_MSI PCI devices behind a VMD bus are not subject to interrupt remapping, but the irq domain for VMD MSI cannot be distinguished from a regular PCI/MSI irq domain. Add a new domain bus token and allow it in the bus token check in msi_check_reservation_mode() to keep the functionality the same once VMD uses this token. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Acked-by: Jon Derrick Link: https://lore.kernel.org/r/20200826112332.954409970@linutronix.de --- include/linux/irqdomain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index b37350c4fe37..44445d9de881 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -84,6 +84,7 @@ enum irq_domain_bus_token { DOMAIN_BUS_FSL_MC_MSI, DOMAIN_BUS_TI_SCI_INTA_MSI, DOMAIN_BUS_WAKEUP, + DOMAIN_BUS_VMD_MSI, }; /** -- cgit v1.2.3 From 2fd602669ee6d749a7dc47b84b87cef1a5075999 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 Aug 2020 13:16:53 +0200 Subject: PCI/MSI: Provide pci_dev_has_special_msi_domain() helper Provide a helper function to check whether a PCI device is handled by a non-standard PCI/MSI domain. This will be used to exclude such devices which hang of a special bus, e.g. VMD, to be excluded from the irq domain override in irq remapping. Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20200826112333.139387358@linutronix.de --- include/linux/msi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index 5aa126b7820b..a65cc47d0610 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -373,6 +373,7 @@ int pci_msi_domain_check_cap(struct irq_domain *domain, struct msi_domain_info *info, struct device *dev); u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev); +bool pci_dev_has_special_msi_domain(struct pci_dev *pdev); #else static inline struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) { -- cgit v1.2.3 From 43e9e705dd57c466c4bfe32ab8c17db537b89297 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 Aug 2020 13:16:57 +0200 Subject: irqdomain/msi: Allow to override msi_domain_alloc/free_irqs() To support MSI irq domains which do not fit at all into the regular MSI irqdomain scheme, like the XEN MSI interrupt management for PV/HVM/DOM0, it's necessary to allow to override the alloc/free implementation. This is a preperatory step to switch X86 away from arch_*_msi_irqs() and store the irq domain pointer right in struct device. No functional change for existing MSI irq domain users. Aside of the evil XEN wrapper this is also useful for special MSI domains which need to do extra alloc/free work before/after calling the generic core function. Work like allocating/freeing MSI descriptors, MSI storage space etc. Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20200826112333.526797548@linutronix.de --- include/linux/msi.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index a65cc47d0610..0180534b5428 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -241,6 +241,10 @@ struct msi_domain_info; * @msi_finish: Optional callback to finalize the allocation * @set_desc: Set the msi descriptor for an interrupt * @handle_error: Optional error handler if the allocation fails + * @domain_alloc_irqs: Optional function to override the default allocation + * function. + * @domain_free_irqs: Optional function to override the default free + * function. * * @get_hwirq, @msi_init and @msi_free are callbacks used by * msi_create_irq_domain() and related interfaces @@ -248,6 +252,22 @@ struct msi_domain_info; * @msi_check, @msi_prepare, @msi_finish, @set_desc and @handle_error * are callbacks used by msi_domain_alloc_irqs() and related * interfaces which are based on msi_desc. + * + * @domain_alloc_irqs, @domain_free_irqs can be used to override the + * default allocation/free functions (__msi_domain_alloc/free_irqs). This + * is initially for a wrapper around XENs seperate MSI universe which can't + * be wrapped into the regular irq domains concepts by mere mortals. This + * allows to universally use msi_domain_alloc/free_irqs without having to + * special case XEN all over the place. + * + * Contrary to other operations @domain_alloc_irqs and @domain_free_irqs + * are set to the default implementation if NULL and even when + * MSI_FLAG_USE_DEF_DOM_OPS is not set to avoid breaking existing users and + * because these callbacks are obviously mandatory. + * + * This is NOT meant to be abused, but it can be useful to build wrappers + * for specialized MSI irq domains which need extra work before and after + * calling __msi_domain_alloc_irqs()/__msi_domain_free_irqs(). */ struct msi_domain_ops { irq_hw_number_t (*get_hwirq)(struct msi_domain_info *info, @@ -270,6 +290,10 @@ struct msi_domain_ops { struct msi_desc *desc); int (*handle_error)(struct irq_domain *domain, struct msi_desc *desc, int error); + int (*domain_alloc_irqs)(struct irq_domain *domain, + struct device *dev, int nvec); + void (*domain_free_irqs)(struct irq_domain *domain, + struct device *dev); }; /** @@ -327,8 +351,11 @@ int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); +int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, + int nvec); int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec); +void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain); -- cgit v1.2.3 From 85a8dfc57a0b96785881735e09a61a0fde911ca4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 Aug 2020 13:16:59 +0200 Subject: iommm/vt-d: Store irq domain in struct device As a first step to make X86 utilize the direct MSI irq domain operations store the irq domain pointer in the device struct when a device is probed. This is done from dmar_pci_bus_add_dev() because it has to work even when DMA remapping is disabled. It only overrides the irqdomain of devices which are handled by a regular PCI/MSI irq domain which protects PCI devices behind special busses like VMD which have their own irq domain. No functional change. It just avoids the redirection through arch_*_msi_irqs() and allows the PCI/MSI core to directly invoke the irq domain alloc/free functions instead of having to look up the irq domain for every single MSI interupt. Signed-off-by: Thomas Gleixner Acked-by: Joerg Roedel Link: https://lore.kernel.org/r/20200826112333.714566121@linutronix.de --- include/linux/intel-iommu.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index b1ed2f25f7c0..46f5aaaa66ff 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -425,6 +425,8 @@ struct q_inval { int free_cnt; }; +struct dmar_pci_notify_info; + #ifdef CONFIG_IRQ_REMAP /* 1MB - maximum possible interrupt remapping table size */ #define INTR_REMAP_PAGE_ORDER 8 @@ -439,6 +441,11 @@ struct ir_table { struct irte *base; unsigned long *bitmap; }; + +void intel_irq_remap_add_device(struct dmar_pci_notify_info *info); +#else +static inline void +intel_irq_remap_add_device(struct dmar_pci_notify_info *info) { } #endif struct iommu_flush { -- cgit v1.2.3 From 077ee78e392869e46ae6bdc6ba2a3c4249d0b5e1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 Aug 2020 13:17:02 +0200 Subject: PCI/MSI: Make arch_.*_msi_irq[s] fallbacks selectable The arch_.*_msi_irq[s] fallbacks are compiled in whether an architecture requires them or not. Architectures which are fully utilizing hierarchical irq domains should never call into that code. It's not only architectures which depend on that by implementing one or more of the weak functions, there is also a bunch of drivers which relies on the weak functions which invoke msi_controller::setup_irq[s] and msi_controller::teardown_irq. Make the architectures and drivers which rely on them select them in Kconfig and if not selected replace them by stub functions which emit a warning and fail the PCI/MSI interrupt allocation. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20200826112333.992429909@linutronix.de --- include/linux/msi.h | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index 0180534b5428..6b584cc4757c 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -193,17 +193,38 @@ void pci_msi_mask_irq(struct irq_data *data); void pci_msi_unmask_irq(struct irq_data *data); /* - * The arch hooks to setup up msi irqs. Those functions are - * implemented as weak symbols so that they /can/ be overriden by - * architecture specific code if needed. + * The arch hooks to setup up msi irqs. Default functions are implemented + * as weak symbols so that they /can/ be overriden by architecture specific + * code if needed. These hooks must be enabled by the architecture or by + * drivers which depend on them via msi_controller based MSI handling. + * + * If CONFIG_PCI_MSI_ARCH_FALLBACKS is not selected they are replaced by + * stubs with warnings. */ +#ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc); void arch_teardown_msi_irq(unsigned int irq); int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void arch_teardown_msi_irqs(struct pci_dev *dev); -void arch_restore_msi_irqs(struct pci_dev *dev); - void default_teardown_msi_irqs(struct pci_dev *dev); +#else +static inline int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + WARN_ON_ONCE(1); + return -ENODEV; +} + +static inline void arch_teardown_msi_irqs(struct pci_dev *dev) +{ + WARN_ON_ONCE(1); +} +#endif + +/* + * The restore hooks are still available as they are useful even + * for fully irq domain based setups. Courtesy to XEN/X86. + */ +void arch_restore_msi_irqs(struct pci_dev *dev); void default_restore_msi_irqs(struct pci_dev *dev); struct msi_controller { -- cgit v1.2.3 From 58c909022a5a56cd1d9e89c8c5461fd1f6a27bb5 Mon Sep 17 00:00:00 2001 From: Lenny Szubowicz Date: Fri, 4 Sep 2020 21:31:05 -0400 Subject: efi: Support for MOK variable config table Because of system-specific EFI firmware limitations, EFI volatile variables may not be capable of holding the required contents of the Machine Owner Key (MOK) certificate store when the certificate list grows above some size. Therefore, an EFI boot loader may pass the MOK certs via a EFI configuration table created specifically for this purpose to avoid this firmware limitation. An EFI configuration table is a much more primitive mechanism compared to EFI variables and is well suited for one-way passage of static information from a pre-OS environment to the kernel. This patch adds initial kernel support to recognize, parse, and validate the EFI MOK configuration table, where named entries contain the same data that would otherwise be provided in similarly named EFI variables. Additionally, this patch creates a sysfs binary file for each EFI MOK configuration table entry found. These files are read-only to root and are provided for use by user space utilities such as mokutil. A subsequent patch will load MOK certs into the trusted platform key ring using this infrastructure. Signed-off-by: Lenny Szubowicz Link: https://lore.kernel.org/r/20200905013107.10457-2-lszubowi@redhat.com Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 73db1ae04cef..4a2332f146eb 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -357,6 +357,7 @@ void efi_native_runtime_setup(void); #define LINUX_EFI_TPM_FINAL_LOG_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25) #define LINUX_EFI_MEMRESERVE_TABLE_GUID EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5, 0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2) #define LINUX_EFI_INITRD_MEDIA_GUID EFI_GUID(0x5568e427, 0x68fc, 0x4f3d, 0xac, 0x74, 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68) +#define LINUX_EFI_MOK_VARIABLE_TABLE_GUID EFI_GUID(0xc451ed2b, 0x9694, 0x45d3, 0xba, 0xba, 0xed, 0x9f, 0x89, 0x88, 0xa3, 0x89) /* OEM GUIDs */ #define DELLEMC_EFI_RCI2_TABLE_GUID EFI_GUID(0x2d9f28a2, 0xa886, 0x456a, 0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55) @@ -546,6 +547,7 @@ extern struct efi { unsigned long esrt; /* ESRT table */ unsigned long tpm_log; /* TPM2 Event Log table */ unsigned long tpm_final_log; /* TPM2 Final Events Log table */ + unsigned long mokvar_table; /* MOK variable config table */ efi_get_time_t *get_time; efi_set_time_t *set_time; @@ -1252,4 +1254,36 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size); char *efi_systab_show_arch(char *str); +/* + * The LINUX_EFI_MOK_VARIABLE_TABLE_GUID config table can be provided + * to the kernel by an EFI boot loader. The table contains a packed + * sequence of these entries, one for each named MOK variable. + * The sequence is terminated by an entry with a completely NULL + * name and 0 data size. + */ +struct efi_mokvar_table_entry { + char name[256]; + u64 data_size; + u8 data[]; +} __attribute((packed)); + +#ifdef CONFIG_LOAD_UEFI_KEYS +extern void __init efi_mokvar_table_init(void); +extern struct efi_mokvar_table_entry *efi_mokvar_entry_next( + struct efi_mokvar_table_entry **mokvar_entry); +extern struct efi_mokvar_table_entry *efi_mokvar_entry_find(const char *name); +#else +static inline void efi_mokvar_table_init(void) { } +static inline struct efi_mokvar_table_entry *efi_mokvar_entry_next( + struct efi_mokvar_table_entry **mokvar_entry) +{ + return NULL; +} +static inline struct efi_mokvar_table_entry *efi_mokvar_entry_find( + const char *name) +{ + return NULL; +} +#endif + #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From d8f0cd76684e679d4a8ec4929fcdf6c3a030a007 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Tue, 15 Sep 2020 14:02:58 +0200 Subject: iio: adis: Drop non Managed device functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop `adis_setup_buffer_and_trigger()`. All users were updated to use the devm version of this function. This avoids having almost the same code repeated. Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20200915120258.161587-11-nuno.sa@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 2df67448f0d1..01ba691da2f3 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -517,14 +517,8 @@ struct adis_burst { int devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev, irq_handler_t trigger_handler); -int adis_setup_buffer_and_trigger(struct adis *adis, - struct iio_dev *indio_dev, irqreturn_t (*trigger_handler)(int, void *)); -void adis_cleanup_buffer_and_trigger(struct adis *adis, - struct iio_dev *indio_dev); int devm_adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev); -int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev); -void adis_remove_trigger(struct adis *adis); int adis_update_scan_mode(struct iio_dev *indio_dev, const unsigned long *scan_mask); @@ -538,33 +532,12 @@ devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev, return 0; } -static inline int adis_setup_buffer_and_trigger(struct adis *adis, - struct iio_dev *indio_dev, irqreturn_t (*trigger_handler)(int, void *)) -{ - return 0; -} - -static inline void adis_cleanup_buffer_and_trigger(struct adis *adis, - struct iio_dev *indio_dev) -{ -} - static inline int devm_adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev) { return 0; } -static inline int adis_probe_trigger(struct adis *adis, - struct iio_dev *indio_dev) -{ - return 0; -} - -static inline void adis_remove_trigger(struct adis *adis) -{ -} - #define adis_update_scan_mode NULL #endif /* CONFIG_IIO_BUFFER */ -- cgit v1.2.3 From aabf59432c51be174994ecfe280f75ac139b5550 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Sun, 6 Sep 2020 22:39:49 +0000 Subject: fs: Remove duplicated flag O_NDELAY occurring twice in VALID_OPEN_FLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The O_NDELAY flag occurs twice in the VALID_OPEN_FLAGS definition, this change removes the duplicate. There is no change to the functionality. Note, that the flags O_NONBLOCK and O_NDELAY are not duplicates, as values of these flags are platform dependent, and on platforms like Sparc O_NONBLOCK and O_NDELAY are not the same. This has been done that way to maintain the ABI compatibility with Solaris since the Sparc port was first introduced. This change resolves the following Coccinelle warning: include/linux/fcntl.h:11:13-21: duplicated argument to & or | Signed-off-by: Krzysztof Wilczyński Signed-off-by: Al Viro --- include/linux/fcntl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h index 7bcdcf4f6ab2..921e750843e6 100644 --- a/include/linux/fcntl.h +++ b/include/linux/fcntl.h @@ -8,7 +8,7 @@ /* List of all valid flags for the open/openat flags argument: */ #define VALID_OPEN_FLAGS \ (O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | \ - O_APPEND | O_NDELAY | O_NONBLOCK | O_NDELAY | __O_SYNC | O_DSYNC | \ + O_APPEND | O_NDELAY | O_NONBLOCK | __O_SYNC | O_DSYNC | \ FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \ O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE) -- cgit v1.2.3 From ba3a86e47232ad9f76160929f33ac9c64e4d0567 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 14 Sep 2020 15:44:37 -0700 Subject: rcu-tasks: Fix grace-period/unlock race in RCU Tasks Trace The more intense grace-period processing resulting from the 50x RCU Tasks Trace grace-period speedups exposed the following race condition: o Task A running on CPU 0 executes rcu_read_lock_trace(), entering a read-side critical section. o When Task A eventually invokes rcu_read_unlock_trace() to exit its read-side critical section, this function notes that the ->trc_reader_special.s flag is zero and and therefore invoke wil set ->trc_reader_nesting to zero using WRITE_ONCE(). But before that happens... o The RCU Tasks Trace grace-period kthread running on some other CPU interrogates Task A, but this fails because this task is currently running. This kthread therefore sends an IPI to CPU 0. o CPU 0 receives the IPI, and thus invokes trc_read_check_handler(). Because Task A has not yet cleared its ->trc_reader_nesting counter, this function sees that Task A is still within its read-side critical section. This function therefore sets the ->trc_reader_nesting.b.need_qs flag, AKA the .need_qs flag. Except that Task A has already checked the .need_qs flag, which is part of the ->trc_reader_special.s flag. The .need_qs flag therefore remains set until Task A's next rcu_read_unlock_trace(). o Task A now invokes synchronize_rcu_tasks_trace(), which cannot start a new grace period until the current grace period completes. And thus cannot return until after that time. But Task A's .need_qs flag is still set, which prevents the current grace period from completing. And because Task A is blocked, it will never execute rcu_read_unlock_trace() until its call to synchronize_rcu_tasks_trace() returns. We are therefore deadlocked. This race is improbable, but 80 hours of rcutorture made it happen twice. The race was possible before the grace-period speedup, but roughly 50x less probable. Several thousand hours of rcutorture would have been necessary to have a reasonable chance of making this happen before this 50x speedup. This commit therefore eliminates this deadlock by setting ->trc_reader_nesting to a large negative number before checking the .need_qs and zeroing (or decrementing with respect to its initial value) ->trc_reader_nesting. For its part, the IPI handler's trc_read_check_handler() function adds a check for negative values, deferring evaluation of the task in this case. Taken together, these changes avoid this deadlock scenario. Fixes: 276c410448db ("rcu-tasks: Split ->trc_reader_need_end") Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Cc: # 5.7.x Signed-off-by: Paul E. McKenney --- include/linux/rcupdate_trace.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index d9015aac78c6..a6a6a3acab5a 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -50,6 +50,7 @@ static inline void rcu_read_lock_trace(void) struct task_struct *t = current; WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1); + barrier(); if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && t->trc_reader_special.b.need_mb) smp_mb(); // Pairs with update-side barriers @@ -72,6 +73,9 @@ static inline void rcu_read_unlock_trace(void) rcu_lock_release(&rcu_trace_lock_map); nesting = READ_ONCE(t->trc_reader_nesting) - 1; + barrier(); // Critical section before disabling. + // Disable IPI-based setting of .need_qs. + WRITE_ONCE(t->trc_reader_nesting, INT_MIN); if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) { WRITE_ONCE(t->trc_reader_nesting, nesting); return; // We assume shallow reader nesting. -- cgit v1.2.3 From 9baf68cc4544056f33797b78ec09388f54ecc8f0 Mon Sep 17 00:00:00 2001 From: Alex Kluver Date: Wed, 19 Aug 2020 09:35:43 -0500 Subject: edac,ghes,cper: Add Row Extension to Memory Error Record Memory errors could be printed with incorrect row values since the DIMM size has outgrown the 16 bit row field in the CPER structure. UEFI Specification Version 2.8 has increased the size of row by allowing it to use the first 2 bits from a previously reserved space within the structure. When needed, add the extension bits to the row value printed. Based on UEFI 2.8 Table 299. Memory Error Record Signed-off-by: Alex Kluver Tested-by: Russ Anderson Reviewed-by: Steve Wahl Reviewed-by: Kyle Meyer Acked-by: Borislav Petkov Link: https://lore.kernel.org/r/20200819143544.155096-2-alex.kluver@hpe.com Signed-off-by: Ard Biesheuvel --- include/linux/cper.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cper.h b/include/linux/cper.h index 8537e9282a65..bd2d8a77a784 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -230,6 +230,10 @@ enum { #define CPER_MEM_VALID_RANK_NUMBER 0x8000 #define CPER_MEM_VALID_CARD_HANDLE 0x10000 #define CPER_MEM_VALID_MODULE_HANDLE 0x20000 +#define CPER_MEM_VALID_ROW_EXT 0x40000 + +#define CPER_MEM_EXT_ROW_MASK 0x3 +#define CPER_MEM_EXT_ROW_SHIFT 16 #define CPER_PCIE_VALID_PORT_TYPE 0x0001 #define CPER_PCIE_VALID_VERSION 0x0002 @@ -443,7 +447,7 @@ struct cper_sec_mem_err_old { u8 error_type; }; -/* Memory Error Section (UEFI >= v2.3), UEFI v2.7 sec N.2.5 */ +/* Memory Error Section (UEFI >= v2.3), UEFI v2.8 sec N.2.5 */ struct cper_sec_mem_err { u64 validation_bits; u64 error_status; @@ -461,7 +465,7 @@ struct cper_sec_mem_err { u64 responder_id; u64 target_id; u8 error_type; - u8 reserved; + u8 extended; u16 rank; u16 mem_array_handle; /* "card handle" in UEFI 2.4 */ u16 mem_dev_handle; /* "module handle" in UEFI 2.4 */ @@ -483,8 +487,16 @@ struct cper_mem_err_compact { u16 rank; u16 mem_array_handle; u16 mem_dev_handle; + u8 extended; }; +static inline u32 cper_get_mem_extension(u64 mem_valid, u8 mem_extended) +{ + if (!(mem_valid & CPER_MEM_VALID_ROW_EXT)) + return 0; + return (mem_extended & CPER_MEM_EXT_ROW_MASK) << CPER_MEM_EXT_ROW_SHIFT; +} + /* PCI Express Error Section, UEFI v2.7 sec N.2.7 */ struct cper_sec_pcie { u64 validation_bits; -- cgit v1.2.3 From 612b5d506d066cdf0a739963e7cd28642d500ec1 Mon Sep 17 00:00:00 2001 From: Alex Kluver Date: Wed, 19 Aug 2020 09:35:44 -0500 Subject: cper,edac,efi: Memory Error Record: bank group/address and chip id Updates to the UEFI 2.8 Memory Error Record allow splitting the bank field into bank address and bank group, and using the last 3 bits of the extended field as a chip identifier. When needed, print correct version of bank field, bank group, and chip identification. Based on UEFI 2.8 Table 299. Memory Error Record. Signed-off-by: Alex Kluver Reviewed-by: Russ Anderson Reviewed-by: Kyle Meyer Reviewed-by: Steve Wahl Acked-by: Borislav Petkov Link: https://lore.kernel.org/r/20200819143544.155096-3-alex.kluver@hpe.com Signed-off-by: Ard Biesheuvel --- include/linux/cper.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cper.h b/include/linux/cper.h index bd2d8a77a784..6a511a1078ca 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -231,10 +231,18 @@ enum { #define CPER_MEM_VALID_CARD_HANDLE 0x10000 #define CPER_MEM_VALID_MODULE_HANDLE 0x20000 #define CPER_MEM_VALID_ROW_EXT 0x40000 +#define CPER_MEM_VALID_BANK_GROUP 0x80000 +#define CPER_MEM_VALID_BANK_ADDRESS 0x100000 +#define CPER_MEM_VALID_CHIP_ID 0x200000 #define CPER_MEM_EXT_ROW_MASK 0x3 #define CPER_MEM_EXT_ROW_SHIFT 16 +#define CPER_MEM_BANK_ADDRESS_MASK 0xff +#define CPER_MEM_BANK_GROUP_SHIFT 8 + +#define CPER_MEM_CHIP_ID_SHIFT 5 + #define CPER_PCIE_VALID_PORT_TYPE 0x0001 #define CPER_PCIE_VALID_VERSION 0x0002 #define CPER_PCIE_VALID_COMMAND_STATUS 0x0004 -- cgit v1.2.3 From 639bf4415cadff4c18e13aa5cb0dba2d443e3aa7 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Thu, 17 Sep 2020 12:02:21 +0300 Subject: net/mlx5: Refactor query port speed functions The functions mlx5_query_port_link_width_oper and mlx5_query_port_ib_proto_oper are always called together, so combine them to a new function called mlx5_query_port_oper to avoid duplication. And while the mlx5i_get_port_settings is the same as mlx5_query_port_oper therefore let's remove it. According to the IB spec link_width_oper and ib_proto_oper should be u16 and not as written u8, so perform casting as a preparation to cross-RDMA patch which will fix that type for all drivers in the RDMA subsystem. Fixes: ada68c31ba9c ("net/mlx5: Introduce a new header file for physical port functions") Signed-off-by: Aharon Landau Reviewed-by: Michael Guralnik Signed-off-by: Leon Romanovsky --- include/linux/mlx5/port.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 2d45a6af52a4..4d33ae0c2d97 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -133,10 +133,9 @@ enum mlx5e_connector_type { int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); -int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, - u8 *link_width_oper, u8 local_port); -int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, - u8 *proto_oper, u8 local_port); + +int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper, + u16 *proto_oper, u8 local_port); void mlx5_toggle_port_link(struct mlx5_core_dev *dev); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); -- cgit v1.2.3 From e27014bdb47eb435f78573685f4196c07329f1f7 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Thu, 17 Sep 2020 12:02:22 +0300 Subject: RDMA/mlx5: Delete duplicated mlx5_ptys_width enum Combine two same enums to avoid duplication. Signed-off-by: Aharon Landau Reviewed-by: Michael Guralnik Signed-off-by: Leon Romanovsky --- include/linux/mlx5/port.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 4d33ae0c2d97..23edd2db4803 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -125,6 +125,14 @@ enum mlx5e_connector_type { MLX5E_CONNECTOR_TYPE_NUMBER, }; +enum mlx5_ptys_width { + MLX5_PTYS_WIDTH_1X = 1 << 0, + MLX5_PTYS_WIDTH_2X = 1 << 1, + MLX5_PTYS_WIDTH_4X = 1 << 2, + MLX5_PTYS_WIDTH_8X = 1 << 3, + MLX5_PTYS_WIDTH_12X = 1 << 4, +}; + #define MLX5E_PROT_MASK(link_mode) (1 << link_mode) #define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ (ext ? MLX5_GET(reg, out, ext_##field) : \ -- cgit v1.2.3 From f959dcd6ddfd29235030e8026471ac1b022ad2b0 Mon Sep 17 00:00:00 2001 From: Thomas Tai Date: Thu, 17 Sep 2020 18:43:03 +0200 Subject: dma-direct: Fix potential NULL pointer dereference When booting the kernel v5.9-rc4 on a VM, the kernel would panic when printing a warning message in swiotlb_map(). The dev->dma_mask must not be a NULL pointer when calling the dma mapping layer. A NULL pointer check can potentially avoid the panic. Signed-off-by: Thomas Tai Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Christoph Hellwig --- include/linux/dma-direct.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 805010ea5346..2929685e88aa 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -62,9 +62,6 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, { dma_addr_t end = addr + size - 1; - if (!dev->dma_mask) - return false; - if (is_ram && !IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && min(addr, end) < phys_to_dma(dev, PFN_PHYS(min_low_pfn))) return false; -- cgit v1.2.3 From e0d072782c734d27f5af062c62266f2598f68542 Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Thu, 17 Sep 2020 18:43:40 +0200 Subject: dma-mapping: introduce DMA range map, supplanting dma_pfn_offset The new field 'dma_range_map' in struct device is used to facilitate the use of single or multiple offsets between mapping regions of cpu addrs and dma addrs. It subsumes the role of "dev->dma_pfn_offset" which was only capable of holding a single uniform offset and had no region bounds checking. The function of_dma_get_range() has been modified so that it takes a single argument -- the device node -- and returns a map, NULL, or an error code. The map is an array that holds the information regarding the DMA regions. Each range entry contains the address offset, the cpu_start address, the dma_start address, and the size of the region. of_dma_configure() is the typical manner to set range offsets but there are a number of ad hoc assignments to "dev->dma_pfn_offset" in the kernel driver code. These cases now invoke the function dma_direct_set_offset(dev, cpu_addr, dma_addr, size). Signed-off-by: Jim Quinlan [hch: various interface cleanups] Signed-off-by: Christoph Hellwig Reviewed-by: Mathieu Poirier Tested-by: Mathieu Poirier Tested-by: Nathan Chancellor --- include/linux/device.h | 4 ++-- include/linux/dma-direct.h | 53 ++++++++++++++++++++++++++++++++++++++++----- include/linux/dma-mapping.h | 9 +++++++- 3 files changed, 57 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index ca18da4768e3..1c78621fc3c0 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -466,7 +466,7 @@ struct dev_links_info { * such descriptors. * @bus_dma_limit: Limit of an upstream bridge or bus which imposes a smaller * DMA limit than the device itself supports. - * @dma_pfn_offset: offset of DMA memory range relatively of RAM + * @dma_range_map: map for DMA memory ranges relative to that of RAM * @dma_parms: A low level driver may set these to teach IOMMU code about * segment limitations. * @dma_pools: Dma pools (if dma'ble device). @@ -561,7 +561,7 @@ struct device { 64 bit addresses for consistent allocations such descriptors. */ u64 bus_dma_limit; /* upstream dma constraint */ - unsigned long dma_pfn_offset; + const struct bus_dma_region *dma_range_map; struct device_dma_parameters *dma_parms; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 2929685e88aa..83f797e0cb78 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -14,6 +14,41 @@ extern unsigned int zone_dma_bits; +/* + * Record the mapping of CPU physical to DMA addresses for a given region. + */ +struct bus_dma_region { + phys_addr_t cpu_start; + dma_addr_t dma_start; + u64 size; + u64 offset; +}; + +static inline dma_addr_t translate_phys_to_dma(struct device *dev, + phys_addr_t paddr) +{ + const struct bus_dma_region *m; + + for (m = dev->dma_range_map; m->size; m++) + if (paddr >= m->cpu_start && paddr - m->cpu_start < m->size) + return (dma_addr_t)paddr - m->offset; + + /* make sure dma_capable fails when no translation is available */ + return DMA_MAPPING_ERROR; +} + +static inline phys_addr_t translate_dma_to_phys(struct device *dev, + dma_addr_t dma_addr) +{ + const struct bus_dma_region *m; + + for (m = dev->dma_range_map; m->size; m++) + if (dma_addr >= m->dma_start && dma_addr - m->dma_start < m->size) + return (phys_addr_t)dma_addr + m->offset; + + return (phys_addr_t)-1; +} + #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include #ifndef phys_to_dma_unencrypted @@ -23,9 +58,9 @@ extern unsigned int zone_dma_bits; static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev, phys_addr_t paddr) { - dma_addr_t dev_addr = (dma_addr_t)paddr; - - return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); + if (dev->dma_range_map) + return translate_phys_to_dma(dev, paddr); + return paddr; } /* @@ -39,10 +74,14 @@ static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) return __sme_set(phys_to_dma_unencrypted(dev, paddr)); } -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { - phys_addr_t paddr = (phys_addr_t)dev_addr + - ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); + phys_addr_t paddr; + + if (dev->dma_range_map) + paddr = translate_dma_to_phys(dev, dma_addr); + else + paddr = dma_addr; return __sme_clr(paddr); } @@ -62,6 +101,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, { dma_addr_t end = addr + size - 1; + if (addr == DMA_MAPPING_ERROR) + return false; if (is_ram && !IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && min(addr, end) < phys_to_dma(dev, PFN_PHYS(min_low_pfn))) return false; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index df0bff2ea750..bb138ac6f5e6 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -730,4 +730,11 @@ static inline int dma_mmap_wc(struct device *dev, #define dma_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) #endif -#endif +/* + * Legacy interface to set up the dma offset map. Drivers really should not + * actually use it, but we have a few legacy cases left. + */ +int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, + dma_addr_t dma_start, u64 size); + +#endif /* _LINUX_DMA_MAPPING_H */ -- cgit v1.2.3 From 527c412519eb63ed354790f4291c3728815d11a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Sep 2020 09:41:58 +0200 Subject: compat: add a compat_need_64bit_alignment_fixup() helper Add a helper to check if the calling syscall needs a fixup for non-natural 64-bit type alignment in the compat ABI. This will only return true for i386 syscalls on x86_64. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/compat.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index d38c4d7e83bd..f0026a344482 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -932,6 +932,15 @@ static inline bool in_compat_syscall(void) { return false; } #endif /* CONFIG_COMPAT */ +/* + * Some legacy ABIs like the i386 one use less than natural alignment for 64-bit + * types, and will need special compat treatment for that. Most architectures + * don't need that special handling even for compat syscalls. + */ +#ifndef compat_need_64bit_alignment_fixup +#define compat_need_64bit_alignment_fixup() false +#endif + /* * A pointer passed in from user mode. This should not * be used for syscall parameters, just declare them -- cgit v1.2.3 From 80bdad3d7e3ec03f812471d9309f5f682e10f52b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Sep 2020 09:41:59 +0200 Subject: quota: simplify the quotactl compat handling Fold the misaligned u64 workarounds into the main quotactl flow instead of implementing a separate compat syscall handler. Signed-off-by: Christoph Hellwig Acked-by: Jan Kara Signed-off-by: Al Viro --- include/linux/quotaops.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 9cf0cd3dc88c..a0f6668924d3 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -27,9 +27,6 @@ static inline bool is_quota_modification(struct inode *inode, struct iattr *ia) (ia->ia_valid & ATTR_GID && !gid_eq(ia->ia_gid, inode->i_gid)); } -int kernel_quotactl(unsigned int cmd, const char __user *special, - qid_t id, void __user *addr); - #if defined(CONFIG_QUOTA) #define quota_error(sb, fmt, args...) \ -- cgit v1.2.3 From c7b6bac9c72c5fcbd6e9e12545bd3022c7f21860 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 15 Sep 2020 09:30:05 -0700 Subject: drm, iommu: Change type of pasid to u32 PASID is defined as a few different types in iommu including "int", "u32", and "unsigned int". To be consistent and to match with uapi definitions, define PASID and its variations (e.g. max PASID) as "u32". "u32" is also shorter and a little more explicit than "unsigned int". No PASID type change in uapi although it defines PASID as __u64 in some places. Suggested-by: Thomas Gleixner Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Lu Baolu Acked-by: Felix Kuehling Acked-by: Joerg Roedel Link: https://lkml.kernel.org/r/1600187413-163670-2-git-send-email-fenghua.yu@intel.com --- include/linux/amd-iommu.h | 8 ++++---- include/linux/intel-iommu.h | 12 ++++++------ include/linux/intel-svm.h | 2 +- include/linux/iommu.h | 10 +++++----- include/linux/uacce.h | 2 +- 5 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 21e950e4ab62..450717299928 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -76,7 +76,7 @@ extern void amd_iommu_free_device(struct pci_dev *pdev); * * The function returns 0 on success or a negative value on error. */ -extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, +extern int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, struct task_struct *task); /** @@ -88,7 +88,7 @@ extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, * When this function returns the device is no longer using the PASID * and the PASID is no longer bound to its task. */ -extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid); +extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid); /** * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed @@ -114,7 +114,7 @@ extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid); #define AMD_IOMMU_INV_PRI_RSP_FAIL 2 typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev, - int pasid, + u32 pasid, unsigned long address, u16); @@ -166,7 +166,7 @@ extern int amd_iommu_device_info(struct pci_dev *pdev, * @cb: The call-back function */ -typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, int pasid); +typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, u32 pasid); extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, amd_iommu_invalidate_ctx cb); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index b1ed2f25f7c0..7322073f62d0 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -549,7 +549,7 @@ struct dmar_domain { 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ u64 max_addr; /* maximum mapped address */ - int default_pasid; /* + u32 default_pasid; /* * The default pasid used for non-SVM * traffic on mediated devices. */ @@ -708,7 +708,7 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, u32 pasid, u16 qdep, u64 addr, unsigned int size_order); void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, - int pasid); + u32 pasid); int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, unsigned int count, unsigned long options); @@ -737,11 +737,11 @@ extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, struct iommu_gpasid_bind_data *data); -int intel_svm_unbind_gpasid(struct device *dev, int pasid); +int intel_svm_unbind_gpasid(struct device *dev, u32 pasid); struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata); void intel_svm_unbind(struct iommu_sva *handle); -int intel_svm_get_pasid(struct iommu_sva *handle); +u32 intel_svm_get_pasid(struct iommu_sva *handle); int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); @@ -753,7 +753,7 @@ struct intel_svm_dev { struct device *dev; struct svm_dev_ops *ops; struct iommu_sva sva; - int pasid; + u32 pasid; int users; u16 did; u16 dev_iotlb:1; @@ -766,7 +766,7 @@ struct intel_svm { struct intel_iommu *iommu; int flags; - int pasid; + u32 pasid; int gpasid; /* In case that guest PASID is different from host PASID */ struct list_head devs; struct list_head list; diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index c9e7e601950d..39d368a810b8 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -11,7 +11,7 @@ struct device; struct svm_dev_ops { - void (*fault_cb)(struct device *dev, int pasid, u64 address, + void (*fault_cb)(struct device *dev, u32 pasid, u64 address, void *private, int rwxp, int response); }; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index fee209efb756..e57e819aaf2e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -286,7 +286,7 @@ struct iommu_ops { struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm, void *drvdata); void (*sva_unbind)(struct iommu_sva *handle); - int (*sva_get_pasid)(struct iommu_sva *handle); + u32 (*sva_get_pasid)(struct iommu_sva *handle); int (*page_response)(struct device *dev, struct iommu_fault_event *evt, @@ -296,7 +296,7 @@ struct iommu_ops { int (*sva_bind_gpasid)(struct iommu_domain *domain, struct device *dev, struct iommu_gpasid_bind_data *data); - int (*sva_unbind_gpasid)(struct device *dev, int pasid); + int (*sva_unbind_gpasid)(struct device *dev, u32 pasid); int (*def_domain_type)(struct device *dev); @@ -634,7 +634,7 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata); void iommu_sva_unbind_device(struct iommu_sva *handle); -int iommu_sva_get_pasid(struct iommu_sva *handle); +u32 iommu_sva_get_pasid(struct iommu_sva *handle); #else /* CONFIG_IOMMU_API */ @@ -1027,7 +1027,7 @@ static inline void iommu_sva_unbind_device(struct iommu_sva *handle) { } -static inline int iommu_sva_get_pasid(struct iommu_sva *handle) +static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) { return IOMMU_PASID_INVALID; } @@ -1046,7 +1046,7 @@ static inline int iommu_sva_bind_gpasid(struct iommu_domain *domain, } static inline int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, int pasid) + struct device *dev, u32 pasid) { return -ENODEV; } diff --git a/include/linux/uacce.h b/include/linux/uacce.h index 454c2f6672d7..48e319f40275 100644 --- a/include/linux/uacce.h +++ b/include/linux/uacce.h @@ -81,7 +81,7 @@ struct uacce_queue { struct list_head list; struct uacce_qfile_region *qfrs[UACCE_MAX_REGION]; enum uacce_q_state state; - int pasid; + u32 pasid; struct iommu_sva *handle; }; -- cgit v1.2.3 From 2a5054c6e7b16906984ac36a7363ca46b8d99ade Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 15 Sep 2020 09:30:06 -0700 Subject: iommu/vt-d: Change flags type to unsigned int in binding mm "flags" passed to intel_svm_bind_mm() is a bit mask and should be defined as "unsigned int" instead of "int". Change its type to "unsigned int". Suggested-by: Thomas Gleixner Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Lu Baolu Acked-by: Joerg Roedel Link: https://lkml.kernel.org/r/1600187413-163670-3-git-send-email-fenghua.yu@intel.com --- include/linux/intel-iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 7322073f62d0..9c3e8337442a 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -765,7 +765,7 @@ struct intel_svm { struct mm_struct *mm; struct intel_iommu *iommu; - int flags; + unsigned int flags; u32 pasid; int gpasid; /* In case that guest PASID is different from host PASID */ struct list_head devs; -- cgit v1.2.3 From 877c1a5f79c6984bbe3f2924234c08e2f4f1acd5 Mon Sep 17 00:00:00 2001 From: Tuan Phan Date: Thu, 6 Aug 2020 14:57:34 -0700 Subject: PCI/ACPI: Add Ampere Altra SOC MCFG quirk Ampere Altra SOC supports only 32-bit ECAM reads. Add an MCFG quirk for the platform. Link: https://lore.kernel.org/r/1596751055-12316-1-git-send-email-tuanphan@os.amperecomputing.com Signed-off-by: Tuan Phan Signed-off-by: Bjorn Helgaas --- include/linux/pci-ecam.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 1af5cb02ef7f..033ce74f02e8 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -51,6 +51,7 @@ extern const struct pci_ecam_ops pci_generic_ecam_ops; #if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS) extern const struct pci_ecam_ops pci_32b_ops; /* 32-bit accesses only */ +extern const struct pci_ecam_ops pci_32b_read_ops; /* 32-bit read only */ extern const struct pci_ecam_ops hisi_pcie_ops; /* HiSilicon */ extern const struct pci_ecam_ops thunder_pem_ecam_ops; /* Cavium ThunderX 1.x & 2.x */ extern const struct pci_ecam_ops pci_thunder_ecam_ops; /* Cavium ThunderX 1.x */ -- cgit v1.2.3 From 9d8feb460adb90e89e64101ce2c1cfcd548a6d83 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Mon, 31 Aug 2020 10:01:48 +0300 Subject: RDMA/mlx5: Add sw_owner_v2 bit capability Added sw_owner_v2 which will be enabled for future devices, replacing sw_owner bit. Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index de1ffb4804d6..3061ceebbaf3 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -420,7 +420,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_1a[0x2]; u8 ipsec_encrypt[0x1]; u8 ipsec_decrypt[0x1]; - u8 reserved_at_1e[0x2]; + u8 sw_owner_v2[0x1]; + u8 reserved_at_1f[0x1]; u8 termination_table_raw_traffic[0x1]; u8 reserved_at_21[0x1]; -- cgit v1.2.3 From 52ad9bc64c74167466e70e0df4b99ee5ccef0078 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 15 Sep 2020 09:30:11 -0700 Subject: mm: Add a pasid member to struct mm_struct A PASID is shared by all threads in a process. So the logical place to keep track of it is in the mm_struct. Both ARM and x86 would use this PASID. [ bp: Massage commit message. ] Suggested-by: Christoph Hellwig Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Link: https://lkml.kernel.org/r/1600187413-163670-8-git-send-email-fenghua.yu@intel.com --- include/linux/mm_types.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 496c3ff97cce..1ff0615ef19f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -542,6 +542,10 @@ struct mm_struct { atomic_long_t hugetlb_usage; #endif struct work_struct async_put_work; + +#ifdef CONFIG_IOMMU_SUPPORT + u32 pasid; +#endif } __randomize_layout; /* -- cgit v1.2.3 From a748c6975dea325da540610c2ba9b5f332c603e6 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 16 Sep 2020 23:10:05 +0200 Subject: bpf: propagate poke descriptors to subprograms Previously, there was no need for poke descriptors being present in subprogram's bpf_prog_aux struct since tailcalls were simply not allowed in them. Each subprog is JITed independently so in order to enable JITing subprograms that use tailcalls, do the following: - in fixup_bpf_calls() store the index of tailcall insn onto the generated poke descriptor, - in case when insn patching occurs, adjust the tailcall insn idx from bpf_patch_insn_data, - then in jit_subprogs() check whether the given poke descriptor belongs to the current subprog by checking if that previously stored absolute index of tail call insn is in the scope of the insns of given subprog, - update the insn->imm with new poke descriptor slot so that while JITing the proper poke descriptor will be grabbed This way each of the main program's poke descriptors are distributed across the subprograms poke descriptor array, so main program's descriptors can be untracked out of the prog array map. Add also subprog's aux struct to the BPF map poke_progs list by calling on it map_poke_track(). In case of any error, call the map_poke_untrack() on subprog's aux structs that have already been registered to prog array map. Signed-off-by: Maciej Fijalkowski Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5dcce0364634..a23e5eb728c8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -707,6 +707,7 @@ struct bpf_jit_poke_descriptor { bool ip_stable; u8 adj_off; u16 reason; + u32 insn_idx; }; /* reg_type info for ctx arguments */ -- cgit v1.2.3 From cf71b174d3464c7dc22f86f25d629a8d9d5c3519 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 16 Sep 2020 23:10:06 +0200 Subject: bpf: rename poke descriptor's 'ip' member to 'tailcall_target' Reflect the actual purpose of poke->ip and rename it to poke->tailcall_target so that it will not the be confused with another poke target that will be introduced in next commit. While at it, do the same thing with poke->ip_stable - rename it to poke->tailcall_target_stable. Signed-off-by: Maciej Fijalkowski Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a23e5eb728c8..f3790c9cf542 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -697,14 +697,14 @@ enum bpf_jit_poke_reason { /* Descriptor of pokes pointing /into/ the JITed image. */ struct bpf_jit_poke_descriptor { - void *ip; + void *tailcall_target; union { struct { struct bpf_map *map; u32 key; } tail_call; }; - bool ip_stable; + bool tailcall_target_stable; u8 adj_off; u16 reason; u32 insn_idx; -- cgit v1.2.3 From 32b313ce9db54991a053da66883289e99d6ad820 Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Thu, 17 Sep 2020 23:30:27 +0200 Subject: PCI: endpoint: Use "NULL" instead of "0" as a NULL pointer When returning a NULL pointer, use "NULL" instead of "0". Fixes sparse warning given by executing "make C=2 drivers/pci/": CHECK drivers/pci/endpoint/pci-epc-core.c drivers/pci/endpoint/pci-epc-core.c: note: in included file: ./include/linux/pci-ep-cfs.h:22:16: warning: Using plain integer as NULL pointer CHECK drivers/pci/endpoint/pci-epf-core.c drivers/pci/endpoint/pci-epf-core.c: note: in included file: ./include/linux/pci-ep-cfs.h:31:16: warning: Using plain integer as NULL pointer Link: https://lore.kernel.org/r/80895f7465719edb3aa259e907acc4bc3217945c.1600378209.git.gustavo.pimentel@synopsys.com Reported-by: Bjorn Helgaas Signed-off-by: Gustavo Pimentel Signed-off-by: Bjorn Helgaas Cc: Kishon Vijay Abraham I Cc: Joao Pinto --- include/linux/pci-ep-cfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-ep-cfs.h b/include/linux/pci-ep-cfs.h index f42b0fd4b4bc..662881335c7e 100644 --- a/include/linux/pci-ep-cfs.h +++ b/include/linux/pci-ep-cfs.h @@ -19,7 +19,7 @@ void pci_ep_cfs_remove_epf_group(struct config_group *group); #else static inline struct config_group *pci_ep_cfs_add_epc_group(const char *name) { - return 0; + return NULL; } static inline void pci_ep_cfs_remove_epc_group(struct config_group *group) @@ -28,7 +28,7 @@ static inline void pci_ep_cfs_remove_epc_group(struct config_group *group) static inline struct config_group *pci_ep_cfs_add_epf_group(const char *name) { - return 0; + return NULL; } static inline void pci_ep_cfs_remove_epf_group(struct config_group *group) -- cgit v1.2.3 From 2492c205d2bbbc01f5c9e49fffe4b2e633c33f38 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 17 Sep 2020 10:19:10 +0800 Subject: netdev: Remove unused functions There is no callers in tree, so can remove it. Signed-off-by: YueHaibing Reviewed-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 157e0242e9ee..909b1fbb0481 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4677,16 +4677,6 @@ int netdev_class_create_file_ns(const struct class_attribute *class_attr, void netdev_class_remove_file_ns(const struct class_attribute *class_attr, const void *ns); -static inline int netdev_class_create_file(const struct class_attribute *class_attr) -{ - return netdev_class_create_file_ns(class_attr, NULL); -} - -static inline void netdev_class_remove_file(const struct class_attribute *class_attr) -{ - netdev_class_remove_file_ns(class_attr, NULL); -} - extern const struct kobj_ns_type_operations net_ns_type_operations; const char *netdev_drivername(const struct net_device *dev); -- cgit v1.2.3 From 7f6e4312e15a5c370e84eaa685879b6bdcc717e4 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 16 Sep 2020 23:10:07 +0200 Subject: bpf: Limit caller's stack depth 256 for subprogs with tailcalls Protect against potential stack overflow that might happen when bpf2bpf calls get combined with tailcalls. Limit the caller's stack depth for such case down to 256 so that the worst case scenario would result in 8k stack size (32 which is tailcall limit * 256 = 8k). Suggested-by: Alexei Starovoitov Signed-off-by: Maciej Fijalkowski Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 53c7bd568c5d..5026b75db972 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -358,6 +358,7 @@ struct bpf_subprog_info { u32 start; /* insn idx of function entry point */ u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u16 stack_depth; /* max. stack depth used by this function */ + bool has_tail_call; }; /* single container for all structs -- cgit v1.2.3 From ebf7d1f508a73871acf3b2bfbfa1323a477acdb3 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 16 Sep 2020 23:10:08 +0200 Subject: bpf, x64: rework pro/epilogue and tailcall handling in JIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit serves two things: 1) it optimizes BPF prologue/epilogue generation 2) it makes possible to have tailcalls within BPF subprogram Both points are related to each other since without 1), 2) could not be achieved. In [1], Alexei says: "The prologue will look like: nop5 xor eax,eax  // two new bytes if bpf_tail_call() is used in this // function push rbp mov rbp, rsp sub rsp, rounded_stack_depth push rax // zero init tail_call counter variable number of push rbx,r13,r14,r15 Then bpf_tail_call will pop variable number rbx,.. and final 'pop rax' Then 'add rsp, size_of_current_stack_frame' jmp to next function and skip over 'nop5; xor eax,eax; push rpb; mov rbp, rsp' This way new function will set its own stack size and will init tail call counter with whatever value the parent had. If next function doesn't use bpf_tail_call it won't have 'xor eax,eax'. Instead it would need to have 'nop2' in there." Implement that suggestion. Since the layout of stack is changed, tail call counter handling can not rely anymore on popping it to rbx just like it have been handled for constant prologue case and later overwrite of rbx with actual value of rbx pushed to stack. Therefore, let's use one of the register (%rcx) that is considered to be volatile/caller-saved and pop the value of tail call counter in there in the epilogue. Drop the BUILD_BUG_ON in emit_prologue and in emit_bpf_tail_call_indirect where instruction layout is not constant anymore. Introduce new poke target, 'tailcall_bypass' to poke descriptor that is dedicated for skipping the register pops and stack unwind that are generated right before the actual jump to target program. For case when the target program is not present, BPF program will skip the pop instructions and nop5 dedicated for jmpq $target. An example of such state when only R6 of callee saved registers is used by program: ffffffffc0513aa1: e9 0e 00 00 00 jmpq 0xffffffffc0513ab4 ffffffffc0513aa6: 5b pop %rbx ffffffffc0513aa7: 58 pop %rax ffffffffc0513aa8: 48 81 c4 00 00 00 00 add $0x0,%rsp ffffffffc0513aaf: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) ffffffffc0513ab4: 48 89 df mov %rbx,%rdi When target program is inserted, the jump that was there to skip pops/nop5 will become the nop5, so CPU will go over pops and do the actual tailcall. One might ask why there simply can not be pushes after the nop5? In the following example snippet: ffffffffc037030c: 48 89 fb mov %rdi,%rbx (...) ffffffffc0370332: 5b pop %rbx ffffffffc0370333: 58 pop %rax ffffffffc0370334: 48 81 c4 00 00 00 00 add $0x0,%rsp ffffffffc037033b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) ffffffffc0370340: 48 81 ec 00 00 00 00 sub $0x0,%rsp ffffffffc0370347: 50 push %rax ffffffffc0370348: 53 push %rbx ffffffffc0370349: 48 89 df mov %rbx,%rdi ffffffffc037034c: e8 f7 21 00 00 callq 0xffffffffc0372548 There is the bpf2bpf call (at ffffffffc037034c) right after the tailcall and jump target is not present. ctx is in %rbx register and BPF subprogram that we will call into on ffffffffc037034c is relying on it, e.g. it will pick ctx from there. Such code layout is therefore broken as we would overwrite the content of %rbx with the value that was pushed on the prologue. That is the reason for the 'bypass' approach. Special care needs to be taken during the install/update/remove of tailcall target. In case when target program is not present, the CPU must not execute the pop instructions that precede the tailcall. To address that, the following states can be defined: A nop, unwind, nop B nop, unwind, tail C skip, unwind, nop D skip, unwind, tail A is forbidden (lead to incorrectness). The state transitions between tailcall install/update/remove will work as follows: First install tail call f: C->D->B(f) * poke the tailcall, after that get rid of the skip Update tail call f to f': B(f)->B(f') * poke the tailcall (poke->tailcall_target) and do NOT touch the poke->tailcall_bypass Remove tail call: B(f')->C(f') * poke->tailcall_bypass is poked back to jump, then we wait the RCU grace period so that other programs will finish its execution and after that we are safe to remove the poke->tailcall_target Install new tail call (f''): C(f')->D(f'')->B(f''). * same as first step This way CPU can never be exposed to "unwind, tail" state. Last but not least, when tailcalls get mixed with bpf2bpf calls, it would be possible to encounter the endless loop due to clearing the tailcall counter if for example we would use the tailcall3-like from BPF selftests program that would be subprogram-based, meaning the tailcall would be present within the BPF subprogram. This test, broken down to particular steps, would do: entry -> set tailcall counter to 0, bump it by 1, tailcall to func0 func0 -> call subprog_tail (we are NOT skipping the first 11 bytes of prologue and this subprogram has a tailcall, therefore we clear the counter...) subprog -> do the same thing as entry and then loop forever. To address this, the idea is to go through the call chain of bpf2bpf progs and look for a tailcall presence throughout whole chain. If we saw a single tail call then each node in this call chain needs to be marked as a subprog that can reach the tailcall. We would later feed the JIT with this info and: - set eax to 0 only when tailcall is reachable and this is the entry prog - if tailcall is reachable but there's no tailcall in insns of currently JITed prog then push rax anyway, so that it will be possible to propagate further down the call chain - finally if tailcall is reachable, then we need to precede the 'call' insn with mov rax, [rbp - (stack_depth + 8)] Tail call related cases from test_verifier kselftest are also working fine. Sample BPF programs that utilize tail calls (sockex3, tracex5) work properly as well. [1]: https://lore.kernel.org/bpf/20200517043227.2gpq22ifoq37ogst@ast-mbp.dhcp.thefacebook.com/ Suggested-by: Alexei Starovoitov Signed-off-by: Maciej Fijalkowski Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +++ include/linux/bpf_verifier.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f3790c9cf542..d7c5a6ed87e3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -698,6 +698,8 @@ enum bpf_jit_poke_reason { /* Descriptor of pokes pointing /into/ the JITed image. */ struct bpf_jit_poke_descriptor { void *tailcall_target; + void *tailcall_bypass; + void *bypass_addr; union { struct { struct bpf_map *map; @@ -738,6 +740,7 @@ struct bpf_prog_aux { bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool func_proto_unreliable; bool sleepable; + bool tail_call_reachable; enum bpf_tramp_prog_type trampoline_prog_type; struct bpf_trampoline *trampoline; struct hlist_node tramp_hlist; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5026b75db972..fbc964526ba3 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -359,6 +359,7 @@ struct bpf_subprog_info { u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u16 stack_depth; /* max. stack depth used by this function */ bool has_tail_call; + bool tail_call_reachable; }; /* single container for all structs -- cgit v1.2.3 From 09b28d76eac48e922dc293da1aa2b2b85c32aeee Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 17 Sep 2020 19:09:18 -0700 Subject: bpf: Add abnormal return checks. LD_[ABS|IND] instructions may return from the function early. bpf_tail_call pseudo instruction is either fallthrough or return. Allow them in the subprograms only when subprograms are BTF annotated and have scalar return types. Allow ld_abs and tail_call in the main program even if it calls into subprograms. In the past that was not ok to do for ld_abs, since it was JITed with special exit sequence. Since bpf_gen_ld_abs() was introduced the ld_abs looks like normal exit insn from JIT point of view, so it's safe to allow them in the main program. Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index fbc964526ba3..2bb48a2c4d08 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -360,6 +360,7 @@ struct bpf_subprog_info { u16 stack_depth; /* max. stack depth used by this function */ bool has_tail_call; bool tail_call_reachable; + bool has_ld_abs; }; /* single container for all structs -- cgit v1.2.3 From cc80d10d6fcf6f47a2a3efe2c4139e50906a4764 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Tue, 25 Aug 2020 20:01:50 +0300 Subject: interconnect: Add get_bw() callback The interconnect controller hardware may support querying the current bandwidth settings, so add a callback for providers to implement this functionality if supported. Link: https://lore.kernel.org/r/20200825170152.6434-2-georgi.djakov@linaro.org Reviewed-by: Saravana Kannan Signed-off-by: Georgi Djakov --- include/linux/interconnect-provider.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index 4735518de515..520f70fe5a31 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -38,6 +38,7 @@ struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec, * @aggregate: pointer to device specific aggregate operation function * @pre_aggregate: pointer to device specific function that is called * before the aggregation begins (optional) + * @get_bw: pointer to device specific function to get current bandwidth * @xlate: provider-specific callback for mapping nodes from phandle arguments * @dev: the device this interconnect provider belongs to * @users: count of active users @@ -51,6 +52,7 @@ struct icc_provider { int (*aggregate)(struct icc_node *node, u32 tag, u32 avg_bw, u32 peak_bw, u32 *agg_avg, u32 *agg_peak); void (*pre_aggregate)(struct icc_node *node); + int (*get_bw)(struct icc_node *node, u32 *avg, u32 *peak); struct icc_node* (*xlate)(struct of_phandle_args *spec, void *data); struct device *dev; int users; -- cgit v1.2.3 From b1d681d8d32499bcf284462d92aeb5f9fe72bf5b Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Tue, 25 Aug 2020 20:01:51 +0300 Subject: interconnect: Add sync state support The bootloaders often do some initial configuration of the interconnects in the system and we want to keep this configuration until all consumers have probed and expressed their bandwidth needs. This is because we don't want to change the configuration by starting to disable unused paths until every user had a chance to request the amount of bandwidth it needs. To accomplish this we will implement an interconnect specific sync_state callback which will synchronize (aggregate and set) the current bandwidth settings when all consumers have been probed. Link: https://lore.kernel.org/r/20200825170152.6434-3-georgi.djakov@linaro.org Reviewed-by: Saravana Kannan Signed-off-by: Georgi Djakov --- include/linux/interconnect-provider.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index 520f70fe5a31..f713308b8a8f 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -75,6 +75,8 @@ struct icc_provider { * @req_list: a list of QoS constraint requests associated with this node * @avg_bw: aggregated value of average bandwidth requests from all consumers * @peak_bw: aggregated value of peak bandwidth requests from all consumers + * @init_avg: average bandwidth value that is read from the hardware during init + * @init_peak: peak bandwidth value that is read from the hardware during init * @data: pointer to private data */ struct icc_node { @@ -91,6 +93,8 @@ struct icc_node { struct hlist_head req_list; u32 avg_bw; u32 peak_bw; + u32 init_avg; + u32 init_peak; void *data; }; @@ -108,6 +112,7 @@ int icc_nodes_remove(struct icc_provider *provider); int icc_provider_add(struct icc_provider *provider); int icc_provider_del(struct icc_provider *provider); struct icc_node *of_icc_get_from_provider(struct of_phandle_args *spec); +void icc_sync_state(struct device *dev); #else -- cgit v1.2.3 From 37050e3ab0b3f02819e3d70ab01d97addb810b28 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Tue, 8 Sep 2020 12:03:02 -0700 Subject: ieee80211: redefine S1G bits with GENMASK The S1G capability fields were defined by ORing BITS() together, and expecting a custom macro to use the _SHIFT definitions. Use the Linux kernel GENMASK for the definitions now, and FIELD_{GET,PREP} to access the fields in the future. Take the chance to rename eg. S1G_CAPAB_B0 to the more compact S1G_CAP0. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200908190323.15814-2-thomas@adapt-ip.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 156 +++++++++++++++++++++++----------------------- 1 file changed, 78 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index c47f43e65a2f..53fba39d4ba6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2330,84 +2330,84 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) } /* S1G Capabilities Information field */ -#define S1G_CAPAB_B0_S1G_LONG BIT(0) -#define S1G_CAPAB_B0_SGI_1MHZ BIT(1) -#define S1G_CAPAB_B0_SGI_2MHZ BIT(2) -#define S1G_CAPAB_B0_SGI_4MHZ BIT(3) -#define S1G_CAPAB_B0_SGI_8MHZ BIT(4) -#define S1G_CAPAB_B0_SGI_16MHZ BIT(5) -#define S1G_CAPAB_B0_SUPP_CH_WIDTH_MASK (BIT(6) | BIT(7)) -#define S1G_CAPAB_B0_SUPP_CH_WIDTH_SHIFT 6 - -#define S1G_CAPAB_B1_RX_LDPC BIT(0) -#define S1G_CAPAB_B1_TX_STBC BIT(1) -#define S1G_CAPAB_B1_RX_STBC BIT(2) -#define S1G_CAPAB_B1_SU_BFER BIT(3) -#define S1G_CAPAB_B1_SU_BFEE BIT(4) -#define S1G_CAPAB_B1_BFEE_STS_MASK (BIT(5) | BIT(6) | BIT(7)) -#define S1G_CAPAB_B1_BFEE_STS_SHIFT 5 - -#define S1G_CAPAB_B2_SOUNDING_DIMENSIONS_MASK (BIT(0) | BIT(1) | BIT(2)) -#define S1G_CAPAB_B2_SOUNDING_DIMENSIONS_SHIFT 0 -#define S1G_CAPAB_B2_MU_BFER BIT(3) -#define S1G_CAPAB_B2_MU_BFEE BIT(4) -#define S1G_CAPAB_B2_PLUS_HTC_VHT BIT(5) -#define S1G_CAPAB_B2_TRAVELING_PILOT_MASK (BIT(6) | BIT(7)) -#define S1G_CAPAB_B2_TRAVELING_PILOT_SHIFT 6 - -#define S1G_CAPAB_B3_RD_RESPONDER BIT(0) -#define S1G_CAPAB_B3_HT_DELAYED_BA BIT(1) -#define S1G_CAPAB_B3_MAX_MPDU_LEN BIT(2) -#define S1G_CAPAB_B3_MAX_AMPDU_LEN_EXP_MASK (BIT(3) | BIT(4)) -#define S1G_CAPAB_B3_MAX_AMPDU_LEN_EXP_SHIFT 3 -#define S1G_CAPAB_B3_MIN_MPDU_START_MASK (BIT(5) | BIT(6) | BIT(7)) -#define S1G_CAPAB_B3_MIN_MPDU_START_SHIFT 5 - -#define S1G_CAPAB_B4_UPLINK_SYNC BIT(0) -#define S1G_CAPAB_B4_DYNAMIC_AID BIT(1) -#define S1G_CAPAB_B4_BAT BIT(2) -#define S1G_CAPAB_B4_TIME_ADE BIT(3) -#define S1G_CAPAB_B4_NON_TIM BIT(4) -#define S1G_CAPAB_B4_GROUP_AID BIT(5) -#define S1G_CAPAB_B4_STA_TYPE_MASK (BIT(6) | BIT(7)) -#define S1G_CAPAB_B4_STA_TYPE_SHIFT 6 - -#define S1G_CAPAB_B5_CENT_AUTH_CONTROL BIT(0) -#define S1G_CAPAB_B5_DIST_AUTH_CONTROL BIT(1) -#define S1G_CAPAB_B5_AMSDU BIT(2) -#define S1G_CAPAB_B5_AMPDU BIT(3) -#define S1G_CAPAB_B5_ASYMMETRIC_BA BIT(4) -#define S1G_CAPAB_B5_FLOW_CONTROL BIT(5) -#define S1G_CAPAB_B5_SECTORIZED_BEAM_MASK (BIT(6) | BIT(7)) -#define S1G_CAPAB_B5_SECTORIZED_BEAM_SHIFT 6 - -#define S1G_CAPAB_B6_OBSS_MITIGATION BIT(0) -#define S1G_CAPAB_B6_FRAGMENT_BA BIT(1) -#define S1G_CAPAB_B6_NDP_PS_POLL BIT(2) -#define S1G_CAPAB_B6_RAW_OPERATION BIT(3) -#define S1G_CAPAB_B6_PAGE_SLICING BIT(4) -#define S1G_CAPAB_B6_TXOP_SHARING_IMP_ACK BIT(5) -#define S1G_CAPAB_B6_VHT_LINK_ADAPT_MASK (BIT(6) | BIT(7)) -#define S1G_CAPAB_B6_VHT_LINK_ADAPT_SHIFT 6 - -#define S1G_CAPAB_B7_TACK_AS_PS_POLL BIT(0) -#define S1G_CAPAB_B7_DUP_1MHZ BIT(1) -#define S1G_CAPAB_B7_MCS_NEGOTIATION BIT(2) -#define S1G_CAPAB_B7_1MHZ_CTL_RESPONSE_PREAMBLE BIT(3) -#define S1G_CAPAB_B7_NDP_BFING_REPORT_POLL BIT(4) -#define S1G_CAPAB_B7_UNSOLICITED_DYN_AID BIT(5) -#define S1G_CAPAB_B7_SECTOR_TRAINING_OPERATION BIT(6) -#define S1G_CAPAB_B7_TEMP_PS_MODE_SWITCH BIT(7) - -#define S1G_CAPAB_B8_TWT_GROUPING BIT(0) -#define S1G_CAPAB_B8_BDT BIT(1) -#define S1G_CAPAB_B8_COLOR_MASK (BIT(2) | BIT(3) | BIT(4)) -#define S1G_CAPAB_B8_COLOR_SHIFT 2 -#define S1G_CAPAB_B8_TWT_REQUEST BIT(5) -#define S1G_CAPAB_B8_TWT_RESPOND BIT(6) -#define S1G_CAPAB_B8_PV1_FRAME BIT(7) - -#define S1G_CAPAB_B9_LINK_ADAPT_PER_CONTROL_RESPONSE BIT(0) +#define S1G_CAP0_S1G_LONG BIT(0) +#define S1G_CAP0_SGI_1MHZ BIT(1) +#define S1G_CAP0_SGI_2MHZ BIT(2) +#define S1G_CAP0_SGI_4MHZ BIT(3) +#define S1G_CAP0_SGI_8MHZ BIT(4) +#define S1G_CAP0_SGI_16MHZ BIT(5) +#define S1G_CAP0_SUPP_CH_WIDTH GENMASK(7, 6) + +#define S1G_SUPP_CH_WIDTH_2 0 +#define S1G_SUPP_CH_WIDTH_4 1 +#define S1G_SUPP_CH_WIDTH_8 2 +#define S1G_SUPP_CH_WIDTH_16 3 +#define S1G_SUPP_CH_WIDTH_MAX(cap) ((1 << FIELD_GET(S1G_CAP0_SUPP_CH_WIDTH, \ + cap[0])) << 1) + +#define S1G_CAP1_RX_LDPC BIT(0) +#define S1G_CAP1_TX_STBC BIT(1) +#define S1G_CAP1_RX_STBC BIT(2) +#define S1G_CAP1_SU_BFER BIT(3) +#define S1G_CAP1_SU_BFEE BIT(4) +#define S1G_CAP1_BFEE_STS GENMASK(7, 5) + +#define S1G_CAP2_SOUNDING_DIMENSIONS GENMASK(2, 0) +#define S1G_CAP2_MU_BFER BIT(3) +#define S1G_CAP2_MU_BFEE BIT(4) +#define S1G_CAP2_PLUS_HTC_VHT BIT(5) +#define S1G_CAP2_TRAVELING_PILOT GENMASK(7, 6) + +#define S1G_CAP3_RD_RESPONDER BIT(0) +#define S1G_CAP3_HT_DELAYED_BA BIT(1) +#define S1G_CAP3_MAX_MPDU_LEN BIT(2) +#define S1G_CAP3_MAX_AMPDU_LEN_EXP GENMASK(4, 3) +#define S1G_CAP3_MIN_MPDU_START GENMASK(7, 5) + +#define S1G_CAP4_UPLINK_SYNC BIT(0) +#define S1G_CAP4_DYNAMIC_AID BIT(1) +#define S1G_CAP4_BAT BIT(2) +#define S1G_CAP4_TIME_ADE BIT(3) +#define S1G_CAP4_NON_TIM BIT(4) +#define S1G_CAP4_GROUP_AID BIT(5) +#define S1G_CAP4_STA_TYPE GENMASK(7, 6) + +#define S1G_CAP5_CENT_AUTH_CONTROL BIT(0) +#define S1G_CAP5_DIST_AUTH_CONTROL BIT(1) +#define S1G_CAP5_AMSDU BIT(2) +#define S1G_CAP5_AMPDU BIT(3) +#define S1G_CAP5_ASYMMETRIC_BA BIT(4) +#define S1G_CAP5_FLOW_CONTROL BIT(5) +#define S1G_CAP5_SECTORIZED_BEAM GENMASK(7, 6) + +#define S1G_CAP6_OBSS_MITIGATION BIT(0) +#define S1G_CAP6_FRAGMENT_BA BIT(1) +#define S1G_CAP6_NDP_PS_POLL BIT(2) +#define S1G_CAP6_RAW_OPERATION BIT(3) +#define S1G_CAP6_PAGE_SLICING BIT(4) +#define S1G_CAP6_TXOP_SHARING_IMP_ACK BIT(5) +#define S1G_CAP6_VHT_LINK_ADAPT GENMASK(7, 6) + +#define S1G_CAP7_TACK_AS_PS_POLL BIT(0) +#define S1G_CAP7_DUP_1MHZ BIT(1) +#define S1G_CAP7_MCS_NEGOTIATION BIT(2) +#define S1G_CAP7_1MHZ_CTL_RESPONSE_PREAMBLE BIT(3) +#define S1G_CAP7_NDP_BFING_REPORT_POLL BIT(4) +#define S1G_CAP7_UNSOLICITED_DYN_AID BIT(5) +#define S1G_CAP7_SECTOR_TRAINING_OPERATION BIT(6) +#define S1G_CAP7_TEMP_PS_MODE_SWITCH BIT(7) + +#define S1G_CAP8_TWT_GROUPING BIT(0) +#define S1G_CAP8_BDT BIT(1) +#define S1G_CAP8_COLOR GENMASK(4, 2) +#define S1G_CAP8_TWT_REQUEST BIT(5) +#define S1G_CAP8_TWT_RESPOND BIT(6) +#define S1G_CAP8_PV1_FRAME BIT(7) + +#define S1G_CAP9_LINK_ADAPT_PER_CONTROL_RESPONSE BIT(0) + +#define S1G_OPER_CH_WIDTH_PRIMARY_1MHZ BIT(0) +#define S1G_OPER_CH_WIDTH_OPER GENMASK(4, 1) /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 -- cgit v1.2.3 From 9026118f20e28f202dab34f219bbb831ffb8c4dc Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 8 Sep 2020 21:15:20 +0800 Subject: soundwire: Add generic bandwidth allocation algorithm This algorithm computes bus parameters like clock frequency, frame shape and port transport parameters based on active stream(s) running on the bus. Developers can also implement their own .compute_params() callback for specific resource management algorithm, and set if before calling sdw_add_bus_master() Credits: this patch is based on an earlier internal contribution by Vinod Koul, Sanyog Kale, Shreyas Nc and Hardik Shah. All hard-coded values were removed from the initial contribution to use BIOS information instead. Signed-off-by: Bard Liao Acked-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20200908131520.5712-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 790823d2d33b..de9ea2ce2d35 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -964,6 +964,9 @@ struct sdw_stream_runtime { struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name); void sdw_release_stream(struct sdw_stream_runtime *stream); + +int sdw_compute_params(struct sdw_bus *bus); + int sdw_stream_add_master(struct sdw_bus *bus, struct sdw_stream_config *stream_config, struct sdw_port_config *port_config, -- cgit v1.2.3 From 264c03a245de7c5b1cc3836db45de6b991f877ca Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 14 Sep 2020 16:34:07 +0100 Subject: stacktrace: Remove reliable argument from arch_stack_walk() callback Currently the callback passed to arch_stack_walk() has an argument called reliable passed to it to indicate if the stack entry is reliable, a comment says that this is used by some printk() consumers. However in the current kernel none of the arch_stack_walk() implementations ever set this flag to true and the only callback implementation we have is in the generic stacktrace code which ignores the flag. It therefore appears that this flag is redundant so we can simplify and clarify things by removing it. Signed-off-by: Mark Brown Reviewed-by: Miroslav Benes Link: https://lore.kernel.org/r/20200914153409.25097-2-broonie@kernel.org Signed-off-by: Will Deacon --- include/linux/stacktrace.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index b7af8cc13eda..50e2df30b0aa 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -29,14 +29,11 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size); * stack_trace_consume_fn - Callback for arch_stack_walk() * @cookie: Caller supplied pointer handed back by arch_stack_walk() * @addr: The stack entry address to consume - * @reliable: True when the stack entry is reliable. Required by - * some printk based consumers. * * Return: True, if the entry was consumed or skipped * False, if there is no space left to store */ -typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr, - bool reliable); +typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr); /** * arch_stack_walk - Architecture specific function to walk the stack * @consume_entry: Callback which is invoked by the architecture code for -- cgit v1.2.3 From 874f635310648a5adcedbd7e02ea0555cfa1da56 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Tue, 1 Sep 2020 21:55:47 +0100 Subject: cpufreq: report whether cpufreq supports Frequency Invariance (FI) Now that the update of the FI scale factor is done in cpufreq core for selected functions - target(), target_index() and fast_switch(), we can provide feedback to the task scheduler and architecture code on whether cpufreq supports FI. For this purpose provide an external function to expose whether the cpufreq drivers support FI, by using a static key. The logic behind the enablement of cpufreq-based invariance is as follows: - cpufreq-based invariance is disabled by default - cpufreq-based invariance is enabled if any of the callbacks above is implemented while the unsupported setpolicy() is not The cpufreq_supports_freq_invariance() function only returns whether cpufreq is instrumented with the arch_set_freq_scale() calls that result in support for frequency invariance. Due to the lack of knowledge on whether the implementation of arch_set_freq_scale() actually results in the setting of a scale factor based on cpufreq information, it is up to the architecture code to ensure the setting and provision of the scale factor to the scheduler. Signed-off-by: Ionela Voinescu Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a911e5d06845..e54767e2a68a 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -217,6 +217,7 @@ void refresh_frequency_limits(struct cpufreq_policy *policy); void cpufreq_update_policy(unsigned int cpu); void cpufreq_update_limits(unsigned int cpu); bool have_governor_per_policy(void); +bool cpufreq_supports_freq_invariance(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); void cpufreq_disable_fast_switch(struct cpufreq_policy *policy); @@ -237,6 +238,10 @@ static inline unsigned int cpufreq_get_hw_max_freq(unsigned int cpu) { return 0; } +static inline bool cpufreq_supports_freq_invariance(void) +{ + return false; +} static inline void disable_cpufreq(void) { } #endif -- cgit v1.2.3 From ecddc3a0d5d752071c627aa1a1d4d7b529ddae67 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 1 Sep 2020 21:55:48 +0100 Subject: arch_topology, cpufreq: constify arch_* cpumasks The passed cpumask arguments to arch_set_freq_scale() and arch_freq_counters_available() are only iterated over, so reflect this in the prototype. This also allows to pass system cpumasks like cpu_online_mask without getting a warning. Signed-off-by: Valentin Schneider Signed-off-by: Ionela Voinescu Acked-by: Catalin Marinas Acked-by: Viresh Kumar Reviewed-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- include/linux/arch_topology.h | 2 +- include/linux/cpufreq.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 69b1dabe39dc..810c83336257 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -30,7 +30,7 @@ static inline unsigned long topology_get_freq_scale(int cpu) return per_cpu(freq_scale, cpu); } -bool arch_freq_counters_available(struct cpumask *cpus); +bool arch_freq_counters_available(const struct cpumask *cpus); DECLARE_PER_CPU(unsigned long, thermal_pressure); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index e54767e2a68a..9f779fbdbe7b 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1011,7 +1011,8 @@ static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy, extern void arch_freq_prepare_all(void); extern unsigned int arch_freq_get_on_cpu(int cpu); -extern void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, +extern void arch_set_freq_scale(const struct cpumask *cpus, + unsigned long cur_freq, unsigned long max_freq); /* the following are really really optional */ -- cgit v1.2.3 From 15e5d5b45b2b7072214af519357a1c0af078c50b Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 1 Sep 2020 21:55:49 +0100 Subject: arch_topology, arm, arm64: define arch_scale_freq_invariant() arch_scale_freq_invariant() is used by schedutil to determine whether the scheduler's load-tracking signals are frequency invariant. Its definition is overridable, though by default it is hardcoded to 'true' if arch_scale_freq_capacity() is defined ('false' otherwise). This behaviour is not overridden on arm, arm64 and other users of the generic arch topology driver, which is somewhat precarious: arch_scale_freq_capacity() will always be defined, yet not all cpufreq drivers are guaranteed to drive the frequency invariance scale factor setting. In other words, the load-tracking signals may very well *not* be frequency invariant. Now that cpufreq can be queried on whether the current driver is driving the Frequency Invariance (FI) scale setting, the current situation can be improved. This combines the query of whether cpufreq supports the setting of the frequency scale factor, with whether all online CPUs are counter-based FI enabled. While cpufreq FI enablement applies at system level, for all CPUs, counter-based FI support could also be used for only a subset of CPUs to set the invariance scale factor. Therefore, if cpufreq-based FI support is present, we consider the system to be invariant. If missing, we require all online CPUs to be counter-based FI enabled in order for the full system to be considered invariant. If the system ends up not being invariant, a new condition is needed in the counter initialization code that disables all scale factor setting based on counters. Precedence of counters over cpufreq use is not important here. The invariant status is only given to the system if all CPUs have at least one method of setting the frequency scale factor. Signed-off-by: Valentin Schneider Signed-off-by: Ionela Voinescu Acked-by: Catalin Marinas Acked-by: Viresh Kumar Reviewed-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- include/linux/arch_topology.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 810c83336257..083df331a3c9 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -30,6 +30,8 @@ static inline unsigned long topology_get_freq_scale(int cpu) return per_cpu(freq_scale, cpu); } +bool topology_scale_freq_invariant(void); + bool arch_freq_counters_available(const struct cpumask *cpus); DECLARE_PER_CPU(unsigned long, thermal_pressure); -- cgit v1.2.3 From 6d1349c769ea28543bdde20a658cbc93c3bc936d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 18 Sep 2020 16:45:50 -0400 Subject: [PATCH] reduce boilerplate in fsid handling Get rid of boilerplate in most of ->statfs() instances... Signed-off-by: Al Viro --- include/linux/statfs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/statfs.h b/include/linux/statfs.h index fac4356ea1bf..20f695b90aab 100644 --- a/include/linux/statfs.h +++ b/include/linux/statfs.h @@ -45,4 +45,9 @@ struct kstatfs { struct dentry; extern int vfs_get_fsid(struct dentry *dentry, __kernel_fsid_t *fsid); +static inline __kernel_fsid_t u64_to_fsid(u64 v) +{ + return (__kernel_fsid_t){.val = {(u32)v, (u32)(v>>32)}}; +} + #endif -- cgit v1.2.3 From 3753d9779038ab011e01b949253492aaa37bf57a Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Thu, 17 Sep 2020 22:08:32 -0700 Subject: net: fix build without CONFIG_SYSCTL definition Earlier commit 316cdaa1158a ("net: add option to not create fall-back tunnels in root-ns as well") removed the CONFIG_SYSCTL to enable the kernel-commandline to work. However, this variable gets defined only when CONFIG_SYSCTL option is selected. With this change the behavior would default to creating fall-back tunnels in all namespaces when CONFIG_SYSCTL is not selected and the kernel commandline option will be ignored. Fixes: 316cdaa1158a ("net: add option to not create fall-back tunnels in root-ns as well") Signed-off-by: Mahesh Bandewar Reported-by: Randy Dunlap Reported-by: kernel test robot Acked-by: Randy Dunlap # build-tested Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 909b1fbb0481..fef0eb96cf69 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -634,8 +634,9 @@ extern int sysctl_devconf_inherit_init_net; */ static inline bool net_has_fallback_tunnels(const struct net *net) { - return (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1) || - !sysctl_fb_tunnels_only_for_init_net; + return !IS_ENABLED(CONFIG_SYSCTL) || + !sysctl_fb_tunnels_only_for_init_net || + (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1); } static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) -- cgit v1.2.3 From 6d23d831e9bd0b1d2bcd9a1ecdc6ac8e6d162c36 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Fri, 18 Sep 2020 17:48:01 +0800 Subject: ptp_qoriq: support FIPER3 The FIPER3 (fixed interval period pulse generator) is supported on DPAA2 and ENETC network controller hardware. This patch is to support it in ptp_qoriq driver. Signed-off-by: Yangbo Lu Acked-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/fsl/ptp_qoriq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index 884b8f8ca06d..01acebe37fab 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -136,6 +136,7 @@ struct ptp_qoriq_registers { #define DEFAULT_TMR_PRSC 2 #define DEFAULT_FIPER1_PERIOD 1000000000 #define DEFAULT_FIPER2_PERIOD 1000000000 +#define DEFAULT_FIPER3_PERIOD 1000000000 struct ptp_qoriq { void __iomem *base; @@ -147,6 +148,7 @@ struct ptp_qoriq { struct dentry *debugfs_root; struct device *dev; bool extts_fifo_support; + bool fiper3_support; int irq; int phc_index; u32 tclk_period; /* nanoseconds */ @@ -155,6 +157,7 @@ struct ptp_qoriq { u32 cksel; u32 tmr_fiper1; u32 tmr_fiper2; + u32 tmr_fiper3; u32 (*read)(unsigned __iomem *addr); void (*write)(unsigned __iomem *addr, u32 val); }; -- cgit v1.2.3 From bbed0bbdddaf46260aeb1a8910a3b32941e321a2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 21 Sep 2020 03:10:30 +0300 Subject: net: dsa: tag_8021q: add VLANs to the master interface too The whole purpose of tag_8021q is to send VLAN-tagged traffic to the CPU, from which the driver can decode the source port and switch id. Currently this only works if the VLAN filtering on the master is disabled. Change that by explicitly adding code to tag_8021q.c to add the VLANs corresponding to the tags to the filter of the master interface. Because we now need to call vlan_vid_add, then we also need to hold the RTNL mutex. Propagate that requirement to the callers of dsa_8021q_setup and modify the existing call sites as appropriate. Note that one call path, sja1105_best_effort_vlan_filtering_set -> sja1105_vlan_filtering -> sja1105_setup_8021q_tagging, was already holding this lock. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/8021q.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 2b003ae9fb38..88cd72dfa4e0 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -31,6 +31,8 @@ struct dsa_8021q_context { const struct dsa_8021q_ops *ops; struct dsa_switch *ds; struct list_head crosschip_links; + /* EtherType of RX VID, used for filtering on master interface */ + __be16 proto; }; #define DSA_8021Q_N_SUBVLAN 8 -- cgit v1.2.3 From 49347755a84052fd1317c3897c7cea954c8f89fe Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 16 Sep 2020 00:34:53 +0200 Subject: can: include: fix spelling mistakes This patch fixes spelling erros found by "codespell" in the include/linux/can subtree. Link: https://lore.kernel.org/r/20200915223527.1417033-4-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/core.h | 2 +- include/linux/can/dev.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/core.h b/include/linux/can/core.h index e20a0cd09ba5..7da9f1f82e8e 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -2,7 +2,7 @@ /* * linux/can/core.h * - * Protoypes and definitions for CAN protocol modules using the PF_CAN core + * Prototypes and definitions for CAN protocol modules using the PF_CAN core * * Authors: Oliver Hartkopp * Urs Thuermann diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 5e3d45525bd3..516892566ac9 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -108,7 +108,7 @@ static inline bool can_skb_headroom_valid(struct net_device *dev, skb->ip_summed = CHECKSUM_UNNECESSARY; - /* preform proper loopback on capable devices */ + /* perform proper loopback on capable devices */ if (dev->flags & IFF_ECHO) skb->pkt_type = PACKET_LOOPBACK; else -- cgit v1.2.3 From 80a71815d8cd1be6481ad16fad3167f095045a06 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 16 Sep 2020 00:35:01 +0200 Subject: can: dev: can_put_echo_skb(): propagate error in case of errors The function can_put_echo_skb() can fail for several reasons. It may fail due to OOM, but when it fails it's usually due to locking problems in the driver. In order to help developing and debugging of new drivers propagate error value in case of errors. Link: https://lore.kernel.org/r/20200915223527.1417033-12-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 516892566ac9..ed0482b2f4b2 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -201,8 +201,8 @@ void can_bus_off(struct net_device *dev); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state); -void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, - unsigned int idx); +int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, + unsigned int idx); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr); unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); -- cgit v1.2.3 From 728fc9ff73d3f25220f6b8a52aaf063ec51ef294 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 16 Sep 2020 00:35:22 +0200 Subject: can: rx-offload: can_rx_offload_add_manual(): add new initialization function This patch adds a new initialization function: can_rx_offload_add_manual() It should be used to add support rx-offload to a driver, if the callback mechanism should not be used. Use e.g. can_rx_offload_queue_sorted() to queue skbs into rx-offload. Link: https://lore.kernel.org/r/20200915223527.1417033-33-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/rx-offload.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index 1b78a0cfb615..f1b38088b765 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -35,6 +35,9 @@ int can_rx_offload_add_timestamp(struct net_device *dev, int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight); +int can_rx_offload_add_manual(struct net_device *dev, + struct can_rx_offload *offload, + unsigned int weight); int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg); int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload); -- cgit v1.2.3 From b1d4dc15b2f430a4f541ab6c91e63a71cf230b7d Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Tue, 15 Sep 2020 09:38:18 +0800 Subject: i2c: Switch to using the new API kobj_to_dev() Switch to using the new API kobj_to_dev(). Signed-off-by: Tian Tao Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index fc55ea41d323..56622658b215 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -344,7 +344,7 @@ const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id, static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj) { - struct device * const dev = container_of(kobj, struct device, kobj); + struct device * const dev = kobj_to_dev(kobj); return to_i2c_client(dev); } -- cgit v1.2.3 From 1138ce1cf60954d1c0e2d7b4eba5b4df5813fd86 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 18:31:30 -0700 Subject: sunrpc: fix duplicated word in Change "time time" to "time expiry_time" to match the field name. Signed-off-by: Randy Dunlap Cc: "J. Bruce Fields" Cc: Chuck Lever Cc: Trond Myklebust Cc: Anna Schumaker Cc: linux-nfs@vger.kernel.org Signed-off-by: Anna Schumaker --- include/linux/sunrpc/cache.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 10891b70fc7b..d0965e2997b0 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -45,7 +45,8 @@ */ struct cache_head { struct hlist_node cache_list; - time64_t expiry_time; /* After time time, don't use the data */ + time64_t expiry_time; /* After time expiry_time, don't use + * the data */ time64_t last_refresh; /* If CACHE_PENDING, this is when upcall was * sent, else this is when update was * received, though it is alway set to -- cgit v1.2.3 From 0bdd4cea12a9fd79a7eb7de8493a5fef54d0eea6 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Tue, 7 Jul 2020 21:50:12 +0200 Subject: Replace HTTP links with HTTPS ones: NFS, SUNRPC, and LOCKD clients Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Anna Schumaker --- include/linux/sunrpc/bc_xprt.h | 2 +- include/linux/sunrpc/msg_prot.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index d796058cdff2..f07c334c599f 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -4,7 +4,7 @@ NetApp provides this source code under the GPL v2 License. The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. +https://opensource.org/licenses/gpl-license.php. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index bea40d9f03a1..43f854487539 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -143,7 +143,7 @@ typedef __be32 rpc_fraghdr; /* * Well-known netids. See: * - * http://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml + * https://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml */ #define RPCBIND_NETID_UDP "udp" #define RPCBIND_NETID_TCP "tcp" -- cgit v1.2.3 From 24addd848a45747bcda68418710c72fdc8e145e4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 21 Sep 2020 08:58:39 -0700 Subject: fs: Introduce i_blocks_per_page This helper is useful for both THPs and for supporting block size larger than page size. Convert all users that I could find (we have a few different ways of writing this idiom, and I may have missed some). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Acked-by: Dave Kleikamp --- include/linux/pagemap.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 7de11dcd534d..853733286138 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -899,4 +899,20 @@ static inline int page_mkwrite_check_truncate(struct page *page, return offset; } +/** + * i_blocks_per_page - How many blocks fit in this page. + * @inode: The inode which contains the blocks. + * @page: The page (head page if the page is a THP). + * + * If the block size is larger than the size of this page, return zero. + * + * Context: The caller should hold a refcount on the page to prevent it + * from being split. + * Return: The number of filesystem blocks covered by this page. + */ +static inline +unsigned int i_blocks_per_page(struct inode *inode, struct page *page) +{ + return thp_size(page) >> inode->i_blkbits; +} #endif /* _LINUX_PAGEMAP_H */ -- cgit v1.2.3 From 81ee8e52a71c712dbe04994f1430cb4c88b87ad6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 21 Sep 2020 08:58:42 -0700 Subject: iomap: Change calling convention for zeroing Pass the full length to iomap_zero() and dax_iomap_zero(), and have them return how many bytes they actually handled. This is preparatory work for handling THP, although it looks like DAX could actually take advantage of it if there's a larger contiguous area. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- include/linux/dax.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 6904d4e0b2e0..951a851a0481 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -214,8 +214,7 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); -int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size, - struct iomap *iomap); +s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap); static inline bool dax_mapping(struct address_space *mapping) { return mapping->host && IS_DAX(mapping->host); -- cgit v1.2.3 From c754e137f55e075d6b6ad9b866c32e9aad260a83 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Sep 2020 15:29:59 -0400 Subject: pNFS/flexfiles: Be consistent about mirror index types A mirror index is always of type u32. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9408f3252c8e..69cb46f7b8d2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1611,8 +1611,8 @@ struct nfs_pgio_header { __u64 mds_offset; /* Filelayout dense stripe */ struct nfs_page_array page_array; struct nfs_client *ds_clp; /* pNFS data server */ - int ds_commit_idx; /* ds index if ds_clp is set */ - int pgio_mirror_idx;/* mirror index in pgio layer */ + u32 ds_commit_idx; /* ds index if ds_clp is set */ + u32 pgio_mirror_idx;/* mirror index in pgio layer */ }; struct nfs_mds_commit_info { -- cgit v1.2.3 From 4c9db39361da5dcf0e77f4aeb4817be3bf7d626b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sat, 19 Sep 2020 23:28:25 +0200 Subject: regulator: unexport regulator_lock/unlock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit regulator_lock/unlock() was used only to guard regulator_notifier_call_chain(). As no users remain, make the functions internal. Signed-off-by: Michał Mirosław Link: https://lore.kernel.org/r/d3381aabd2632aff5e7b839d55868bec6e85c811.1600550732.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 8539f34ae42b..11cade73726c 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -533,9 +533,6 @@ int regulator_set_current_limit_regmap(struct regulator_dev *rdev, int regulator_get_current_limit_regmap(struct regulator_dev *rdev); void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data); -void regulator_lock(struct regulator_dev *rdev); -void regulator_unlock(struct regulator_dev *rdev); - /* * Helper functions intended to be used by regulator drivers prior registering * their regulators. -- cgit v1.2.3 From cd7798cbd28044a3026619e36993160ba8fa118d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Sep 2020 14:21:14 +0100 Subject: iio: Add __printf() attributes to various allocation functions A partial set of these was added to IIO a long time back. This fills in some gaps in coverage highlighted by building with W=1 Signed-off-by: Jonathan Cameron Reviewed-by: Alexandru Ardelean Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200913132115.800131-3-jic23@kernel.org --- include/linux/iio/iio.h | 3 ++- include/linux/iio/trigger_consumer.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index e2df67a3b9ab..2e45b3ceafa7 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -691,8 +691,9 @@ static inline void *iio_priv(const struct iio_dev *indio_dev) void iio_device_free(struct iio_dev *indio_dev); struct iio_dev *devm_iio_device_alloc(struct device *parent, int sizeof_priv); +__printf(2, 3) struct iio_trigger *devm_iio_trigger_alloc(struct device *dev, - const char *fmt, ...); + const char *fmt, ...); /** * iio_buffer_enabled() - helper function to test if the buffer is enabled * @indio_dev: IIO device structure for device diff --git a/include/linux/iio/trigger_consumer.h b/include/linux/iio/trigger_consumer.h index 3aa2f132dd67..2c05dfad88d7 100644 --- a/include/linux/iio/trigger_consumer.h +++ b/include/linux/iio/trigger_consumer.h @@ -38,7 +38,7 @@ struct iio_poll_func { }; -struct iio_poll_func +__printf(5, 6) struct iio_poll_func *iio_alloc_pollfunc(irqreturn_t (*h)(int irq, void *p), irqreturn_t (*thread)(int irq, void *p), int type, -- cgit v1.2.3 From 36e322ec5dd24f1d0840061ffe406458669bccf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Thu, 17 Sep 2020 17:52:20 +0200 Subject: iio: adis: Move burst mode into adis_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add burst mode variables in the per device specific data structure. As some drivers support multiple devices with different burst sizes it makes sense this data to be in `adis_data`. While moving the variables, there are two main differences: 1. The `en`variable is dropped. If a device supports burst mode, it will just use it as it will has better performance for almost all real use cases. 2. Replace `extra_len` by `burst_len`. Users should now explicitly define the length of the burst buffer as it is typically constant. This also allows to remove the following line from the library: ``` /* All but the timestamp channel */ burst_length = (indio_dev->num_channels - 1) * sizeof(u16); ``` The library should not assume that a timestamp channel is defined. Moreover, most parts also include some diagnostic data, crc, etc.. in the burst buffer that needed to be included in an `extra_len` variable which is not that nice. On top of this, some devices already start to have some 32bit size channels ... This patch is also a move to completely drop the `struct adis_burst` from the library. Signed-off-by: Nuno Sá Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20200917155223.218500-2-nuno.sa@analog.com --- include/linux/iio/imu/adis.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 01ba691da2f3..c502ea3b9199 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -51,6 +51,11 @@ struct adis_timeout { * @timeouts: Chip specific delays * @enable_irq: Hook for ADIS devices that have a special IRQ enable/disable * @has_paging: True if ADIS device has paged registers + * @burst_reg_cmd: Register command that triggers burst + * @burst_len: Burst size in the SPI RX buffer. If @burst_max_len is defined, + * this should be the minimum size supported by the device. + * @burst_max_len: Holds the maximum burst size when the device supports + * more than one burst mode with different sizes */ struct adis_data { unsigned int read_delay; @@ -75,6 +80,10 @@ struct adis_data { int (*enable_irq)(struct adis *adis, bool enable); bool has_paging; + + unsigned int burst_reg_cmd; + unsigned int burst_len; + unsigned int burst_max_len; }; /** -- cgit v1.2.3 From e8173161746d1d8a8b9dfd8968cf695efaa90d09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Thu, 17 Sep 2020 17:52:23 +0200 Subject: iio: adis. Drop adis_burst struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As there are no users anymore of this structure, it can be safely removed. Signed-off-by: Nuno Sá Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20200917155223.218500-5-nuno.sa@analog.com --- include/linux/iio/imu/adis.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index c502ea3b9199..04e96d688ba9 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -20,7 +20,6 @@ #define ADIS_REG_PAGE_ID 0x00 struct adis; -struct adis_burst; /** * struct adis_timeouts - ADIS chip variant timeouts @@ -108,7 +107,6 @@ struct adis { struct iio_trigger *trig; const struct adis_data *data; - struct adis_burst *burst; unsigned int burst_extra_len; /** * The state_lock is meant to be used during operations that require @@ -508,21 +506,6 @@ int adis_single_conversion(struct iio_dev *indio_dev, #ifdef CONFIG_IIO_ADIS_LIB_BUFFER -/** - * struct adis_burst - ADIS data for burst transfers - * @en burst mode enabled - * @reg_cmd register command that triggers burst - * @extra_len extra length to account in the SPI RX buffer - * @burst_max_len holds the maximum burst size when the device supports - * more than one burst mode with different sizes - */ -struct adis_burst { - bool en; - unsigned int reg_cmd; - const u32 extra_len; - const u32 burst_max_len; -}; - int devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev, irq_handler_t trigger_handler); -- cgit v1.2.3 From 12856e7acde4702b7c3238c15fcba86ff6aa507f Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Thu, 10 Sep 2020 10:59:55 -0400 Subject: PCI/IOV: Mark VFs as not implementing PCI_COMMAND_MEMORY For VFs, the Memory Space Enable bit in the Command Register is hard-wired to 0. Add a new bit to signify devices where the Command Register Memory Space Enable bit does not control the device's response to MMIO accesses. Fixes: abafbc551fdd ("vfio-pci: Invalidate mmaps and block MMIO access on disabled memory") Signed-off-by: Matthew Rosato Acked-by: Bjorn Helgaas Signed-off-by: Alex Williamson --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 835530605c0d..3ff723124ca7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -445,6 +445,7 @@ struct pci_dev { unsigned int is_probed:1; /* Device probing in progress */ unsigned int link_active_reporting:1;/* Device capable of reporting link active */ unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */ + unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */ pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ -- cgit v1.2.3 From 2af30f115d6957f372ce3096c7198763ff253d97 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 21 Sep 2020 13:12:17 +0100 Subject: btf: Make btf_set_contains take a const pointer bsearch doesn't modify the contents of the array, so we can take a const pointer. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200921121227.255763-2-lmb@cloudflare.com --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d7c5a6ed87e3..0478b20d335b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1905,6 +1905,6 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); struct btf_id_set; -bool btf_id_set_contains(struct btf_id_set *set, u32 id); +bool btf_id_set_contains(const struct btf_id_set *set, u32 id); #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From 27774b7073b5d520c80f1fcb8e9993fc139f21bd Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 21 Sep 2020 13:12:19 +0100 Subject: btf: Add BTF_ID_LIST_SINGLE macro Add a convenience macro that allows defining a BTF ID list with a single item. This lets us cut down on repetitive macros. Suggested-by: Andrii Nakryiko Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200921121227.255763-4-lmb@cloudflare.com --- include/linux/btf_ids.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 210b086188a3..57890b357f85 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -76,6 +76,13 @@ extern u32 name[]; #define BTF_ID_LIST_GLOBAL(name) \ __BTF_ID_LIST(name, globl) +/* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with + * a single entry. + */ +#define BTF_ID_LIST_SINGLE(name, prefix, typename) \ + BTF_ID_LIST(name) \ + BTF_ID(prefix, typename) + /* * The BTF_ID_UNUSED macro defines 4 zero bytes. * It's used when we want to define 'unused' entry @@ -140,6 +147,7 @@ extern struct btf_id_set name; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED #define BTF_ID_LIST_GLOBAL(name) u32 name[1]; +#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1]; #define BTF_SET_START(name) static struct btf_id_set name = { 0 }; #define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 }; #define BTF_SET_END(name) -- cgit v1.2.3 From 9436ef6e862b9ca22e5b12f87b106e07d5af4cae Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 21 Sep 2020 13:12:20 +0100 Subject: bpf: Allow specifying a BTF ID per argument in function protos Function prototypes using ARG_PTR_TO_BTF_ID currently use two ways to signal which BTF IDs are acceptable. First, bpf_func_proto.btf_id is an array of IDs, one for each argument. This array is only accessed up to the highest numbered argument that uses ARG_PTR_TO_BTF_ID and may therefore be less than five arguments long. It usually points at a BTF_ID_LIST. Second, check_btf_id is a function pointer that is called by the verifier if present. It gets the actual BTF ID of the register, and the argument number we're currently checking. It turns out that the only user check_arg_btf_id ignores the argument, and is simply used to check whether the BTF ID has a struct sock_common at it's start. Replace both of these mechanisms with an explicit BTF ID for each argument in a function proto. Thanks to btf_struct_ids_match this is very flexible: check_arg_btf_id can be replaced by requiring struct sock_common. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200921121227.255763-5-lmb@cloudflare.com --- include/linux/bpf.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0478b20d335b..87b0d5dcc1ff 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -326,12 +326,16 @@ struct bpf_func_proto { }; enum bpf_arg_type arg_type[5]; }; - int *btf_id; /* BTF ids of arguments */ - bool (*check_btf_id)(u32 btf_id, u32 arg); /* if the argument btf_id is - * valid. Often used if more - * than one btf id is permitted - * for this argument. - */ + union { + struct { + u32 *arg1_btf_id; + u32 *arg2_btf_id; + u32 *arg3_btf_id; + u32 *arg4_btf_id; + u32 *arg5_btf_id; + }; + u32 *arg_btf_id[5]; + }; int *ret_btf_id; /* return value btf_id */ bool (*allowed)(const struct bpf_prog *prog); }; @@ -1385,8 +1389,6 @@ int btf_struct_access(struct bpf_verifier_log *log, u32 *next_btf_id); bool btf_struct_ids_match(struct bpf_verifier_log *log, int off, u32 id, u32 need_type_id); -int btf_resolve_helper_id(struct bpf_verifier_log *log, - const struct bpf_func_proto *fn, int); int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf *btf, -- cgit v1.2.3 From f79e7ea571732a6e16f15c6e2f000c347e2d7431 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 21 Sep 2020 13:12:27 +0100 Subject: bpf: Use a table to drive helper arg type checks The mapping between bpf_arg_type and bpf_reg_type is encoded in a big hairy if statement that is hard to follow. The debug output also leaves to be desired: if a reg_type doesn't match we only print one of the options, instead printing all the valid ones. Convert the if statement into a table which is then used to drive type checking. If none of the reg_types match we print all options, e.g.: R2 type=rdonly_buf expected=fp, pkt, pkt_meta, map_value Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200921121227.255763-12-lmb@cloudflare.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 87b0d5dcc1ff..fc5c901c7542 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -292,6 +292,7 @@ enum bpf_arg_type { ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ + __BPF_ARG_TYPE_MAX, }; /* type of values returned from helper functions */ -- cgit v1.2.3 From 92ec804f3dbf0d986f8e10850bfff14f316d7aaf Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 21 Sep 2020 15:10:53 -0700 Subject: net: phy: bcm7xxx: Add an entry for BCM72113 BCM72113 features a 28nm integrated EPHY, add an entry to the driver for it. Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 6ad4c000661a..d0bd226d6bd9 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -30,6 +30,7 @@ #define PHY_ID_BCM57780 0x03625d90 #define PHY_ID_BCM89610 0x03625cd0 +#define PHY_ID_BCM72113 0x35905310 #define PHY_ID_BCM7250 0xae025280 #define PHY_ID_BCM7255 0xae025120 #define PHY_ID_BCM7260 0xae025190 -- cgit v1.2.3 From 74caba7f2a0685575b3ee5330a118f5922485e02 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 21 Sep 2020 13:24:45 +0206 Subject: printk: move dictionary keys to dev_printk_info Dictionaries are only used for SUBSYSTEM and DEVICE properties. The current implementation stores the property names each time they are used. This requires more space than otherwise necessary. Also, because the dictionary entries are currently considered optional, it cannot be relied upon that they are always available, even if the writer wanted to store them. These issues will increase should new dictionary properties be introduced. Rather than storing the subsystem and device properties in the dict ring, introduce a struct dev_printk_info with separate fields to store only the property values. Embed this struct within the struct printk_info to provide guaranteed availability. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/87mu1jl6ne.fsf@jogness.linutronix.de --- include/linux/dev_printk.h | 8 ++++++++ include/linux/printk.h | 6 ++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h index 3028b644b4fb..6f009559ee54 100644 --- a/include/linux/dev_printk.h +++ b/include/linux/dev_printk.h @@ -21,6 +21,14 @@ struct device; +#define PRINTK_INFO_SUBSYSTEM_LEN 16 +#define PRINTK_INFO_DEVICE_LEN 48 + +struct dev_printk_info { + char subsystem[PRINTK_INFO_SUBSYSTEM_LEN]; + char device[PRINTK_INFO_DEVICE_LEN]; +}; + #ifdef CONFIG_PRINTK __printf(3, 0) __cold diff --git a/include/linux/printk.h b/include/linux/printk.h index fc8f03c54543..071500ee7281 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -158,10 +158,12 @@ static inline void printk_nmi_direct_enter(void) { } static inline void printk_nmi_direct_exit(void) { } #endif /* PRINTK_NMI */ +struct dev_printk_info; + #ifdef CONFIG_PRINTK -asmlinkage __printf(5, 0) +asmlinkage __printf(4, 0) int vprintk_emit(int facility, int level, - const char *dict, size_t dictlen, + const struct dev_printk_info *dev_info, const char *fmt, va_list args); asmlinkage __printf(1, 0) -- cgit v1.2.3 From 15b760c37ad3c3f2b922506eaca4ca8b4292e621 Mon Sep 17 00:00:00 2001 From: Andra Paraschiv Date: Mon, 21 Sep 2020 15:17:15 +0300 Subject: nitro_enclaves: Add ioctl interface definition The Nitro Enclaves driver handles the enclave lifetime management. This includes enclave creation, termination and setting up its resources such as memory and CPU. An enclave runs alongside the VM that spawned it. It is abstracted as a process running in the VM that launched it. The process interacts with the NE driver, that exposes an ioctl interface for creating an enclave and setting up its resources. Changelog v9 -> v10 * Update commit message to include the changelog before the SoB tag(s). v8 -> v9 * No changes. v7 -> v8 * Add NE custom error codes for user space memory regions not backed by pages multiple of 2 MiB, invalid flags and enclave CID. * Add max flag value for enclave image load info. v6 -> v7 * Clarify in the ioctls documentation that the return value is -1 and errno is set on failure. * Update the error code value for NE_ERR_INVALID_MEM_REGION_SIZE as it gets in user space as value 25 (ENOTTY) instead of 515. Update the NE custom error codes values range to not be the same as the ones defined in include/linux/errno.h, although these are not propagated to user space. v5 -> v6 * Fix typo in the description about the NE CPU pool. * Update documentation to kernel-doc format. * Remove the ioctl to query API version. v4 -> v5 * Add more details about the ioctl calls usage e.g. error codes, file descriptors used. * Update the ioctl to set an enclave vCPU to not return a file descriptor. * Add specific NE error codes. v3 -> v4 * Decouple NE ioctl interface from KVM API. * Add NE API version and the corresponding ioctl call. * Add enclave / image load flags options. v2 -> v3 * Remove the GPL additional wording as SPDX-License-Identifier is already in place. v1 -> v2 * Add ioctl for getting enclave image load metadata. * Update NE_ENCLAVE_START ioctl name to NE_START_ENCLAVE. * Add entry in Documentation/userspace-api/ioctl/ioctl-number.rst for NE ioctls. * Update NE ioctls definition based on the updated ioctl range for major and minor. Reviewed-by: Alexander Graf Reviewed-by: Stefan Hajnoczi Signed-off-by: Alexandru Vasile Signed-off-by: Andra Paraschiv Link: https://lore.kernel.org/r/20200921121732.44291-2-andraprs@amazon.com Signed-off-by: Greg Kroah-Hartman --- include/linux/nitro_enclaves.h | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 include/linux/nitro_enclaves.h (limited to 'include/linux') diff --git a/include/linux/nitro_enclaves.h b/include/linux/nitro_enclaves.h new file mode 100644 index 000000000000..d91ef2bfdf47 --- /dev/null +++ b/include/linux/nitro_enclaves.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + */ + +#ifndef _LINUX_NITRO_ENCLAVES_H_ +#define _LINUX_NITRO_ENCLAVES_H_ + +#include + +#endif /* _LINUX_NITRO_ENCLAVES_H_ */ -- cgit v1.2.3 From a992b20cd4ee360dbbe6f69339cb07146e4304d6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 16 Sep 2020 21:11:24 -0700 Subject: fscrypt: add fscrypt_prepare_new_inode() and fscrypt_set_context() fscrypt_get_encryption_info() is intended to be GFP_NOFS-safe. But actually it isn't, since it uses functions like crypto_alloc_skcipher() which aren't GFP_NOFS-safe, even when called under memalloc_nofs_save(). Therefore it can deadlock when called from a context that needs GFP_NOFS, e.g. during an ext4 transaction or between f2fs_lock_op() and f2fs_unlock_op(). This happens when creating a new encrypted file. We can't fix this by just not setting up the key for new inodes right away, since new symlinks need their key to encrypt the symlink target. So we need to set up the new inode's key before starting the transaction. But just calling fscrypt_get_encryption_info() earlier doesn't work, since it assumes the encryption context is already set, and the encryption context can't be set until the transaction. The recently proposed fscrypt support for the ceph filesystem (https://lkml.kernel.org/linux-fscrypt/20200821182813.52570-1-jlayton@kernel.org/T/#u) will have this same ordering problem too, since ceph will need to encrypt new symlinks before setting their encryption context. Finally, f2fs can deadlock when the filesystem is mounted with '-o test_dummy_encryption' and a new file is created in an existing unencrypted directory. Similarly, this is caused by holding too many locks when calling fscrypt_get_encryption_info(). To solve all these problems, add new helper functions: - fscrypt_prepare_new_inode() sets up a new inode's encryption key (fscrypt_info), using the parent directory's encryption policy and a new random nonce. It neither reads nor writes the encryption context. - fscrypt_set_context() persists the encryption context of a new inode, using the information from the fscrypt_info already in memory. This replaces fscrypt_inherit_context(). Temporarily keep fscrypt_inherit_context() around until all filesystems have been converted to use fscrypt_set_context(). Acked-by: Jeff Layton Link: https://lore.kernel.org/r/20200917041136.178600-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index eaf16eb55788..9cf7ca90f3ab 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -158,6 +158,7 @@ int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg); int fscrypt_has_permitted_context(struct inode *parent, struct inode *child); int fscrypt_inherit_context(struct inode *parent, struct inode *child, void *fs_data, bool preload); +int fscrypt_set_context(struct inode *inode, void *fs_data); struct fscrypt_dummy_context { const union fscrypt_context *ctx; @@ -184,6 +185,8 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg); /* keysetup.c */ int fscrypt_get_encryption_info(struct inode *inode); +int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode, + bool *encrypt_ret); void fscrypt_put_encryption_info(struct inode *inode); void fscrypt_free_inode(struct inode *inode); int fscrypt_drop_inode(struct inode *inode); @@ -347,6 +350,11 @@ static inline int fscrypt_inherit_context(struct inode *parent, return -EOPNOTSUPP; } +static inline int fscrypt_set_context(struct inode *inode, void *fs_data) +{ + return -EOPNOTSUPP; +} + struct fscrypt_dummy_context { }; @@ -394,6 +402,15 @@ static inline int fscrypt_get_encryption_info(struct inode *inode) return -EOPNOTSUPP; } +static inline int fscrypt_prepare_new_inode(struct inode *dir, + struct inode *inode, + bool *encrypt_ret) +{ + if (IS_ENCRYPTED(dir)) + return -EOPNOTSUPP; + return 0; +} + static inline void fscrypt_put_encryption_info(struct inode *inode) { return; -- cgit v1.2.3 From e9d5e31d2fe39825b0fc276b14f2a322faf3c77b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 16 Sep 2020 21:11:30 -0700 Subject: fscrypt: remove fscrypt_inherit_context() Now that all filesystems have been converted to use fscrypt_prepare_new_inode() and fscrypt_set_context(), fscrypt_inherit_context() is no longer used. Remove it. Acked-by: Jeff Layton Link: https://lore.kernel.org/r/20200917041136.178600-8-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 9cf7ca90f3ab..81d6ded24328 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -156,8 +156,6 @@ int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg); int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *arg); int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg); int fscrypt_has_permitted_context(struct inode *parent, struct inode *child); -int fscrypt_inherit_context(struct inode *parent, struct inode *child, - void *fs_data, bool preload); int fscrypt_set_context(struct inode *inode, void *fs_data); struct fscrypt_dummy_context { @@ -343,13 +341,6 @@ static inline int fscrypt_has_permitted_context(struct inode *parent, return 0; } -static inline int fscrypt_inherit_context(struct inode *parent, - struct inode *child, - void *fs_data, bool preload) -{ - return -EOPNOTSUPP; -} - static inline int fscrypt_set_context(struct inode *inode, void *fs_data) { return -EOPNOTSUPP; -- cgit v1.2.3 From 31114726b69364f3bf8dd945c600ceed4c430f4f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 16 Sep 2020 21:11:34 -0700 Subject: fscrypt: move fscrypt_prepare_symlink() out-of-line In preparation for moving the logic for "get the encryption policy inherited by new files in this directory" to a single place, make fscrypt_prepare_symlink() a regular function rather than an inline function that wraps __fscrypt_prepare_symlink(). This way, the new function fscrypt_policy_to_inherit() won't need to be exported to filesystems. Acked-by: Jeff Layton Link: https://lore.kernel.org/r/20200917041136.178600-12-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 63 ++++++++++++------------------------------------- 1 file changed, 15 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 81d6ded24328..39e7397a3f10 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -225,9 +225,9 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname); int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags); -int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, - unsigned int max_len, - struct fscrypt_str *disk_link); +int fscrypt_prepare_symlink(struct inode *dir, const char *target, + unsigned int len, unsigned int max_len, + struct fscrypt_str *disk_link); int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, unsigned int len, struct fscrypt_str *disk_link); const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, @@ -520,15 +520,21 @@ static inline int fscrypt_prepare_setflags(struct inode *inode, return 0; } -static inline int __fscrypt_prepare_symlink(struct inode *dir, - unsigned int len, - unsigned int max_len, - struct fscrypt_str *disk_link) +static inline int fscrypt_prepare_symlink(struct inode *dir, + const char *target, + unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link) { - return -EOPNOTSUPP; + if (IS_ENCRYPTED(dir)) + return -EOPNOTSUPP; + disk_link->name = (unsigned char *)target; + disk_link->len = len + 1; + if (disk_link->len > max_len) + return -ENAMETOOLONG; + return 0; } - static inline int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, unsigned int len, @@ -793,45 +799,6 @@ static inline int fscrypt_prepare_setattr(struct dentry *dentry, return 0; } -/** - * fscrypt_prepare_symlink() - prepare to create a possibly-encrypted symlink - * @dir: directory in which the symlink is being created - * @target: plaintext symlink target - * @len: length of @target excluding null terminator - * @max_len: space the filesystem has available to store the symlink target - * @disk_link: (out) the on-disk symlink target being prepared - * - * This function computes the size the symlink target will require on-disk, - * stores it in @disk_link->len, and validates it against @max_len. An - * encrypted symlink may be longer than the original. - * - * Additionally, @disk_link->name is set to @target if the symlink will be - * unencrypted, but left NULL if the symlink will be encrypted. For encrypted - * symlinks, the filesystem must call fscrypt_encrypt_symlink() to create the - * on-disk target later. (The reason for the two-step process is that some - * filesystems need to know the size of the symlink target before creating the - * inode, e.g. to determine whether it will be a "fast" or "slow" symlink.) - * - * Return: 0 on success, -ENAMETOOLONG if the symlink target is too long, - * -ENOKEY if the encryption key is missing, or another -errno code if a problem - * occurred while setting up the encryption key. - */ -static inline int fscrypt_prepare_symlink(struct inode *dir, - const char *target, - unsigned int len, - unsigned int max_len, - struct fscrypt_str *disk_link) -{ - if (IS_ENCRYPTED(dir) || fscrypt_get_dummy_context(dir->i_sb) != NULL) - return __fscrypt_prepare_symlink(dir, len, max_len, disk_link); - - disk_link->name = (unsigned char *)target; - disk_link->len = len + 1; - if (disk_link->len > max_len) - return -ENAMETOOLONG; - return 0; -} - /** * fscrypt_encrypt_symlink() - encrypt the symlink target if needed * @inode: symlink inode -- cgit v1.2.3 From ac4acb1f4b2b6b7e8d913537cccec8789903e164 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 16 Sep 2020 21:11:35 -0700 Subject: fscrypt: handle test_dummy_encryption in more logical way The behavior of the test_dummy_encryption mount option is that when a new file (or directory or symlink) is created in an unencrypted directory, it's automatically encrypted using a dummy encryption policy. That's it; in particular, the encryption (or lack thereof) of existing files (or directories or symlinks) doesn't change. Unfortunately the implementation of test_dummy_encryption is a bit weird and confusing. When test_dummy_encryption is enabled and a file is being created in an unencrypted directory, we set up an encryption key (->i_crypt_info) for the directory. This isn't actually used to do any encryption, however, since the directory is still unencrypted! Instead, ->i_crypt_info is only used for inheriting the encryption policy. One consequence of this is that the filesystem ends up providing a "dummy context" (policy + nonce) instead of a "dummy policy". In commit ed318a6cc0b6 ("fscrypt: support test_dummy_encryption=v2"), I mistakenly thought this was required. However, actually the nonce only ends up being used to derive a key that is never used. Another consequence of this implementation is that it allows for 'inode->i_crypt_info != NULL && !IS_ENCRYPTED(inode)', which is an edge case that can be forgotten about. For example, currently FS_IOC_GET_ENCRYPTION_POLICY on an unencrypted directory may return the dummy encryption policy when the filesystem is mounted with test_dummy_encryption. That seems like the wrong thing to do, since again, the directory itself is not actually encrypted. Therefore, switch to a more logical and maintainable implementation where the dummy encryption policy inheritance is done without setting up keys for unencrypted directories. This involves: - Adding a function fscrypt_policy_to_inherit() which returns the encryption policy to inherit from a directory. This can be a real policy, a dummy policy, or no policy. - Replacing struct fscrypt_dummy_context, ->get_dummy_context(), etc. with struct fscrypt_dummy_policy, ->get_dummy_policy(), etc. - Making fscrypt_fname_encrypted_size() take an fscrypt_policy instead of an inode. Acked-by: Jaegeuk Kim Acked-by: Jeff Layton Link: https://lore.kernel.org/r/20200917041136.178600-13-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 39e7397a3f10..b3b0c5675c6b 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -21,7 +21,7 @@ #define FS_CRYPTO_BLOCK_SIZE 16 -union fscrypt_context; +union fscrypt_policy; struct fscrypt_info; struct seq_file; @@ -62,8 +62,7 @@ struct fscrypt_operations { int (*get_context)(struct inode *inode, void *ctx, size_t len); int (*set_context)(struct inode *inode, const void *ctx, size_t len, void *fs_data); - const union fscrypt_context *(*get_dummy_context)( - struct super_block *sb); + const union fscrypt_policy *(*get_dummy_policy)(struct super_block *sb); bool (*empty_dir)(struct inode *inode); unsigned int max_namelen; bool (*has_stable_inodes)(struct super_block *sb); @@ -101,14 +100,6 @@ static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); } -static inline const union fscrypt_context * -fscrypt_get_dummy_context(struct super_block *sb) -{ - if (!sb->s_cop->get_dummy_context) - return NULL; - return sb->s_cop->get_dummy_context(sb); -} - /* * When d_splice_alias() moves a directory's encrypted alias to its decrypted * alias as a result of the encryption key being added, DCACHE_ENCRYPTED_NAME @@ -158,20 +149,21 @@ int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg); int fscrypt_has_permitted_context(struct inode *parent, struct inode *child); int fscrypt_set_context(struct inode *inode, void *fs_data); -struct fscrypt_dummy_context { - const union fscrypt_context *ctx; +struct fscrypt_dummy_policy { + const union fscrypt_policy *policy; }; -int fscrypt_set_test_dummy_encryption(struct super_block *sb, - const substring_t *arg, - struct fscrypt_dummy_context *dummy_ctx); +int fscrypt_set_test_dummy_encryption( + struct super_block *sb, + const substring_t *arg, + struct fscrypt_dummy_policy *dummy_policy); void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep, struct super_block *sb); static inline void -fscrypt_free_dummy_context(struct fscrypt_dummy_context *dummy_ctx) +fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) { - kfree(dummy_ctx->ctx); - dummy_ctx->ctx = NULL; + kfree(dummy_policy->policy); + dummy_policy->policy = NULL; } /* keyring.c */ @@ -250,12 +242,6 @@ static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) return false; } -static inline const union fscrypt_context * -fscrypt_get_dummy_context(struct super_block *sb) -{ - return NULL; -} - static inline void fscrypt_handle_d_move(struct dentry *dentry) { } @@ -346,7 +332,7 @@ static inline int fscrypt_set_context(struct inode *inode, void *fs_data) return -EOPNOTSUPP; } -struct fscrypt_dummy_context { +struct fscrypt_dummy_policy { }; static inline void fscrypt_show_test_dummy_encryption(struct seq_file *seq, @@ -356,7 +342,7 @@ static inline void fscrypt_show_test_dummy_encryption(struct seq_file *seq, } static inline void -fscrypt_free_dummy_context(struct fscrypt_dummy_context *dummy_ctx) +fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) { } -- cgit v1.2.3 From c8c868abc91ff23f6f5c4444c419de7c277d77e1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 16 Sep 2020 21:11:36 -0700 Subject: fscrypt: make fscrypt_set_test_dummy_encryption() take a 'const char *' fscrypt_set_test_dummy_encryption() requires that the optional argument to the test_dummy_encryption mount option be specified as a substring_t. That doesn't work well with filesystems that use the new mount API, since the new way of parsing mount options doesn't use substring_t. Make it take the argument as a 'const char *' instead. Instead of moving the match_strdup() into the callers in ext4 and f2fs, make them just use arg->from directly. Since the pattern is "test_dummy_encryption=%s", the argument will be null-terminated. Acked-by: Jeff Layton Link: https://lore.kernel.org/r/20200917041136.178600-14-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index b3b0c5675c6b..fc67c4cbaa96 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -15,7 +15,6 @@ #include #include -#include #include #include @@ -153,9 +152,7 @@ struct fscrypt_dummy_policy { const union fscrypt_policy *policy; }; -int fscrypt_set_test_dummy_encryption( - struct super_block *sb, - const substring_t *arg, +int fscrypt_set_test_dummy_encryption(struct super_block *sb, const char *arg, struct fscrypt_dummy_policy *dummy_policy); void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep, struct super_block *sb); -- cgit v1.2.3 From 10942019040c5557556ec22aae0f771b2a1a1a6d Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 1 Sep 2020 16:28:58 +0200 Subject: firmware: psci: Extend psci_set_osi_mode() to allow reset to PC mode The current user (cpuidle-psci) of psci_set_osi_mode() only needs to enable the PSCI OSI mode. Although, as subsequent changes shows, there is a need to be able to reset back into the PSCI PC mode. Therefore, let's extend psci_set_osi_mode() to take a bool as in-parameter, to let the user indicate whether to enable OSI or to switch back to PC mode. Reviewed-by: Sudeep Holla Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/psci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/psci.h b/include/linux/psci.h index 14ad9b9ebcd6..2a1bfb890e58 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -18,7 +18,7 @@ bool psci_tos_resident_on(int cpu); int psci_cpu_suspend_enter(u32 state); bool psci_power_state_is_valid(u32 state); -int psci_set_osi_mode(void); +int psci_set_osi_mode(bool enable); bool psci_has_osi_support(void); struct psci_operations { -- cgit v1.2.3 From 900ffe39fec908e0aa26a30612e43ebc7140db79 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 19 Sep 2020 01:09:36 -0700 Subject: x86/entry: Fix typo in comments for syscall_enter_from_user_mode() Just to help myself and others with finding the correct function names, fix a typo for "usermode" vs "user_mode". Signed-off-by: Kees Cook Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200919080936.259819-1-keescook@chromium.org --- include/linux/entry-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index efebbffcd5cc..3b6754d5a604 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -38,7 +38,7 @@ #endif /* - * TIF flags handled in syscall_enter_from_usermode() + * TIF flags handled in syscall_enter_from_user_mode() */ #ifndef ARCH_SYSCALL_ENTER_WORK # define ARCH_SYSCALL_ENTER_WORK (0) -- cgit v1.2.3 From 028abd9222df0cf5855dab5014a5ebaf06f90565 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Sep 2020 10:22:34 +0200 Subject: fs: remove compat_sys_mount compat_sys_mount is identical to the regular sys_mount now, so remove it and use the native version everywhere. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/compat.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index d38c4d7e83bd..100632280ccc 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -522,12 +522,6 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, compat_ulong_t arg); -/* fs/namespace.c */ -asmlinkage long compat_sys_mount(const char __user *dev_name, - const char __user *dir_name, - const char __user *type, compat_ulong_t flags, - const void __user *data); - /* fs/open.c */ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf); -- cgit v1.2.3 From dd87a72ae968f70852cd7a2d939acd9693e76095 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 21 Sep 2020 03:32:05 +0800 Subject: soundwire: enable Data Port test modes Test modes are required for all SoundWire IP, and help debug integration issues. In theory each port can be configured with a different mode but to simplify this patch only offers separate configurations for the Master and Slave ports - this covers 99% of the intended cases during platform integration. The test mode value is set via platform-specific ways. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Guennadi Liakhovetski Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20200920193207.31241-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index de9ea2ce2d35..41cc1192f9aa 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -554,6 +554,10 @@ enum sdw_port_prep_ops { * @bandwidth: Current bandwidth * @col: Active columns * @row: Active rows + * @s_data_mode: NORMAL, STATIC or PRBS mode for all Slave ports + * @m_data_mode: NORMAL, STATIC or PRBS mode for all Master ports. The value + * should be the same to detect transmission issues, but can be different to + * test the interrupt reports */ struct sdw_bus_params { enum sdw_reg_bank curr_bank; @@ -563,6 +567,8 @@ struct sdw_bus_params { unsigned int bandwidth; unsigned int col; unsigned int row; + int s_data_mode; + int m_data_mode; }; /** -- cgit v1.2.3 From f49735f4978f479b0de4f50ab217d5a56bc83c55 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Tue, 22 Sep 2020 12:34:16 -0600 Subject: cpuidle: record state entry rejection statistics CPUs may fail to enter the chosen idle state if there was a pending interrupt, causing the cpuidle driver to return an error value. Record that and export it via sysfs along with the other idle state statistics. This could prove useful in understanding behavior of the governor and the system during usecases that involve multiple CPUs. Signed-off-by: Lina Iyer [ rjw: Changelog and documentation edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 6175c77bf25e..ed0da0e58e8b 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -38,6 +38,7 @@ struct cpuidle_state_usage { u64 time_ns; unsigned long long above; /* Number of times it's been too deep */ unsigned long long below; /* Number of times it's been too shallow */ + unsigned long long rejected; /* Number of times idle entry was rejected */ #ifdef CONFIG_SUSPEND unsigned long long s2idle_usage; unsigned long long s2idle_time; /* in US */ -- cgit v1.2.3 From 07d098e6bbad04030dab5b3e64149601fcb063ce Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 21 Sep 2020 22:32:49 -0400 Subject: block: allow 'chunk_sectors' to be non-power-of-2 It is possible, albeit more unlikely, for a block device to have a non power-of-2 for chunk_sectors (e.g. 10+2 RAID6 with 128K chunk_sectors, which results in a full-stripe size of 1280K. This causes the RAID6's io_opt to be advertised as 1280K, and a stacked device _could_ then be made to use a blocksize, aka chunk_sectors, that matches non power-of-2 io_opt of underlying RAID6 -- resulting in stacked device's chunk_sectors being a non power-of-2). Update blk_queue_chunk_sectors() and blk_max_size_offset() to accommodate drivers that need a non power-of-2 chunk_sectors. Reviewed-by: Ming Lei Reviewed-by: Martin K. Petersen Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5bd96fbab9b4..6e19a7aa1672 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1063,11 +1063,17 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, static inline unsigned int blk_max_size_offset(struct request_queue *q, sector_t offset) { - if (!q->limits.chunk_sectors) + unsigned int chunk_sectors = q->limits.chunk_sectors; + + if (!chunk_sectors) return q->limits.max_sectors; - return min(q->limits.max_sectors, (unsigned int)(q->limits.chunk_sectors - - (offset & (q->limits.chunk_sectors - 1)))); + if (likely(is_power_of_2(chunk_sectors))) + chunk_sectors -= offset & (chunk_sectors - 1); + else + chunk_sectors -= sector_div(offset, chunk_sectors); + + return min(q->limits.max_sectors, chunk_sectors); } static inline unsigned int blk_rq_get_max_sectors(struct request *rq, -- cgit v1.2.3 From 1967f712677429f52ac3e8896b5ecfecc2372d95 Mon Sep 17 00:00:00 2001 From: Zbigniew Lukwinski Date: Fri, 31 Jul 2020 21:37:16 +0200 Subject: hwmon: (core) Add support for rated attributes Adding implementation for new attributes (rated_min/rated_max) for currentX, inX, powerX, tempX and humidityX. Tested with OpenBMC stack and simple hwmon driver using rated_min/rated_max for the following types of sensors: hwmon_temp, hwmon_in, hwmon_curr, hwmon_power, hwmon_humidity. For each sensor rated attributes were available and returned expected values. Signed-off-by: Zbigniew Lukwinski Link: https://lore.kernel.org/r/1596224237-32280-3-git-send-email-zbigniew.lukwinski@linux.intel.com Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 363d4a814aa1..1e8d6ea8992e 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -85,6 +85,8 @@ enum hwmon_temp_attributes { hwmon_temp_lowest, hwmon_temp_highest, hwmon_temp_reset_history, + hwmon_temp_rated_min, + hwmon_temp_rated_max, }; #define HWMON_T_ENABLE BIT(hwmon_temp_enable) @@ -112,6 +114,8 @@ enum hwmon_temp_attributes { #define HWMON_T_LOWEST BIT(hwmon_temp_lowest) #define HWMON_T_HIGHEST BIT(hwmon_temp_highest) #define HWMON_T_RESET_HISTORY BIT(hwmon_temp_reset_history) +#define HWMON_T_RATED_MIN BIT(hwmon_temp_rated_min) +#define HWMON_T_RATED_MAX BIT(hwmon_temp_rated_max) enum hwmon_in_attributes { hwmon_in_enable, @@ -130,6 +134,8 @@ enum hwmon_in_attributes { hwmon_in_max_alarm, hwmon_in_lcrit_alarm, hwmon_in_crit_alarm, + hwmon_in_rated_min, + hwmon_in_rated_max, }; #define HWMON_I_ENABLE BIT(hwmon_in_enable) @@ -148,6 +154,8 @@ enum hwmon_in_attributes { #define HWMON_I_MAX_ALARM BIT(hwmon_in_max_alarm) #define HWMON_I_LCRIT_ALARM BIT(hwmon_in_lcrit_alarm) #define HWMON_I_CRIT_ALARM BIT(hwmon_in_crit_alarm) +#define HWMON_I_RATED_MIN BIT(hwmon_in_rated_min) +#define HWMON_I_RATED_MAX BIT(hwmon_in_rated_max) enum hwmon_curr_attributes { hwmon_curr_enable, @@ -166,6 +174,8 @@ enum hwmon_curr_attributes { hwmon_curr_max_alarm, hwmon_curr_lcrit_alarm, hwmon_curr_crit_alarm, + hwmon_curr_rated_min, + hwmon_curr_rated_max, }; #define HWMON_C_ENABLE BIT(hwmon_curr_enable) @@ -184,6 +194,8 @@ enum hwmon_curr_attributes { #define HWMON_C_MAX_ALARM BIT(hwmon_curr_max_alarm) #define HWMON_C_LCRIT_ALARM BIT(hwmon_curr_lcrit_alarm) #define HWMON_C_CRIT_ALARM BIT(hwmon_curr_crit_alarm) +#define HWMON_C_RATED_MIN BIT(hwmon_curr_rated_min) +#define HWMON_C_RATED_MAX BIT(hwmon_curr_rated_max) enum hwmon_power_attributes { hwmon_power_enable, @@ -215,6 +227,8 @@ enum hwmon_power_attributes { hwmon_power_max_alarm, hwmon_power_lcrit_alarm, hwmon_power_crit_alarm, + hwmon_power_rated_min, + hwmon_power_rated_max, }; #define HWMON_P_ENABLE BIT(hwmon_power_enable) @@ -246,6 +260,8 @@ enum hwmon_power_attributes { #define HWMON_P_MAX_ALARM BIT(hwmon_power_max_alarm) #define HWMON_P_LCRIT_ALARM BIT(hwmon_power_lcrit_alarm) #define HWMON_P_CRIT_ALARM BIT(hwmon_power_crit_alarm) +#define HWMON_P_RATED_MIN BIT(hwmon_power_rated_min) +#define HWMON_P_RATED_MAX BIT(hwmon_power_rated_max) enum hwmon_energy_attributes { hwmon_energy_enable, @@ -267,6 +283,8 @@ enum hwmon_humidity_attributes { hwmon_humidity_max_hyst, hwmon_humidity_alarm, hwmon_humidity_fault, + hwmon_humidity_rated_min, + hwmon_humidity_rated_max, }; #define HWMON_H_ENABLE BIT(hwmon_humidity_enable) @@ -278,6 +296,8 @@ enum hwmon_humidity_attributes { #define HWMON_H_MAX_HYST BIT(hwmon_humidity_max_hyst) #define HWMON_H_ALARM BIT(hwmon_humidity_alarm) #define HWMON_H_FAULT BIT(hwmon_humidity_fault) +#define HWMON_H_RATED_MIN BIT(hwmon_humidity_rated_min) +#define HWMON_H_RATED_MAX BIT(hwmon_humidity_rated_max) enum hwmon_fan_attributes { hwmon_fan_enable, -- cgit v1.2.3 From 7497d4a66c596fc8312cafe1b8d1e76ad2bc34c3 Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Thu, 27 Aug 2020 13:04:54 -0700 Subject: hwmon: (gsc-hwmon) add fan sensor Add a fan sensor to report RPM's from a fan tach input. Signed-off-by: Tim Harvey Signed-off-by: Guenter Roeck --- include/linux/platform_data/gsc_hwmon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/gsc_hwmon.h b/include/linux/platform_data/gsc_hwmon.h index 37a8f554da00..281f499eda97 100644 --- a/include/linux/platform_data/gsc_hwmon.h +++ b/include/linux/platform_data/gsc_hwmon.h @@ -7,6 +7,7 @@ enum gsc_hwmon_mode { mode_voltage_24bit, mode_voltage_raw, mode_voltage_16bit, + mode_fan, mode_max, }; -- cgit v1.2.3 From 38430f0876fa8b9549ec434f569dce03e057c076 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Sep 2020 09:19:45 +0200 Subject: block: move the NEED_PART_SCAN flag to struct gendisk We can only scan for partitions on the whole disk, so move the flag from struct block_device to struct gendisk. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 4 +--- include/linux/genhd.h | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6ffa783e1633..eb20e28184ab 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -19,8 +19,6 @@ struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; -#define BDEV_NEED_PART_SCAN 0 - struct block_device { dev_t bd_dev; int bd_openers; @@ -39,7 +37,7 @@ struct block_device { struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; - unsigned long bd_flags; + spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; struct backing_dev_info *bd_bdi; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 1c97cf84f011..38f23d757013 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -191,6 +191,8 @@ struct gendisk { void *private_data; int flags; + unsigned long state; +#define GD_NEED_PART_SCAN 0 struct rw_semaphore lookup_sem; struct kobject *slave_dir; -- cgit v1.2.3 From bb3247a399801ebba20bef101c89e563f5fe7f02 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Sep 2020 09:19:55 +0200 Subject: PM: rewrite is_hibernate_resume_dev to not require an inode Just check the dev_t to help simplifying the code. Signed-off-by: Christoph Hellwig Acked-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jens Axboe --- include/linux/suspend.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index cb9afad82a90..8af13ba60c7e 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -473,9 +473,9 @@ static inline int hibernate_quiet_exec(int (*func)(void *data), void *data) { #endif /* CONFIG_HIBERNATION */ #ifdef CONFIG_HIBERNATION_SNAPSHOT_DEV -int is_hibernate_resume_dev(const struct inode *); +int is_hibernate_resume_dev(dev_t dev); #else -static inline int is_hibernate_resume_dev(const struct inode *i) { return 0; } +static inline int is_hibernate_resume_dev(dev_t dev) { return 0; } #endif /* Hibernation and suspend events */ -- cgit v1.2.3 From 21bd900572f3708e281ea25f051fc92462eb1193 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Sep 2020 09:19:56 +0200 Subject: mm: split swap_type_of swap_type_of is used for two entirely different purposes: (1) check what swap type a given device/offset corresponds to (2) find the first available swap device that can be written to Mixing both in a single function creates an unreadable mess. Create two separate functions instead, and switch both to pass a dev_t instead of a struct block_device to further simplify the code. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/swap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 661046994db4..4340a7b6e7a1 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -467,7 +467,8 @@ extern int swapcache_prepare(swp_entry_t); extern void swap_free(swp_entry_t); extern void swapcache_free_entries(swp_entry_t *entries, int n); extern int free_swap_and_cache(swp_entry_t); -extern int swap_type_of(dev_t, sector_t, struct block_device **); +int swap_type_of(dev_t device, sector_t offset); +int find_first_swap(dev_t *device); extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct page *, struct block_device **); extern sector_t swapdev_block(int, pgoff_t); -- cgit v1.2.3 From 1fb1a2ad75e33e646d33e42b9ed17d879d472859 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Sep 2020 09:19:58 +0200 Subject: block: mark blkdev_get static There are no users outside the core block code left now. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6e19a7aa1672..be5ef6f4ba19 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1981,7 +1981,6 @@ void blkdev_show(struct seq_file *seqf, off_t offset); #define BLKDEV_MAJOR_MAX 0 #endif -int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder); struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); -- cgit v1.2.3 From e1ac11859a057ddcf7d6219bd090c7483541767d Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 22 Sep 2020 10:30:15 +0300 Subject: net: bridge: add src field to br_ip Add a new src field to struct br_ip which will be used to lookup S, G entries. When SSM option is added we will enable full br_ip lookups. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 6479a38e52fa..4fb9c4954f3a 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -18,6 +18,12 @@ struct br_ip { __be32 ip4; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr ip6; +#endif + } src; + union { + __be32 ip4; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr ip6; #endif } u; __be16 proto; -- cgit v1.2.3 From eab3227b1240bdcc06c0a01a3fc5bfd2bc12f406 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 22 Sep 2020 10:30:17 +0300 Subject: net: bridge: mcast: rename br_ip's u member to dst Since now we have src in br_ip, u no longer makes sense so rename it to dst. No functional changes. v2: fix build with CONFIG_BATMAN_ADV_MCAST CC: Marek Lindner CC: Simon Wunderlich CC: Antonio Quartulli CC: Sven Eckelmann CC: b.a.t.m.a.n@lists.open-mesh.org Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 4fb9c4954f3a..556caed00258 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -25,7 +25,7 @@ struct br_ip { #if IS_ENABLED(CONFIG_IPV6) struct in6_addr ip6; #endif - } u; + } dst; __be16 proto; __u16 vid; }; -- cgit v1.2.3 From 39097ab66dbe89b16438dd6d178d49e75cb922ed Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 23 Sep 2020 00:29:02 +0200 Subject: net: phy: Fixup kernel doc Add missing parameter documentation, or fixup wrong parameter names. Reviewed-by: Florian Fainelli Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mdio.h | 3 ++- include/linux/phy.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 3a88b699b758..dbd69b3d170b 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -306,7 +306,7 @@ static inline u32 linkmode_adv_to_mii_10gbt_adv_t(unsigned long *advertising) /** * mii_10gbt_stat_mod_linkmode_lpa_t * @advertising: target the linkmode advertisement settings - * @adv: value of the C45 10GBASE-T AN STATUS register + * @lpa: value of the C45 10GBASE-T AN STATUS register * * A small helper function that translates C45 10GBASE-T AN STATUS register bits * to linkmode advertisement settings. Other bits in advertising aren't changed. @@ -371,6 +371,7 @@ struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr); /** * mdio_module_driver() - Helper macro for registering mdio drivers + * @_mdio_driver: driver to register * * Helper macro for MDIO drivers which do not do anything special in module * init/exit. Each module may only use this macro once, and calling it diff --git a/include/linux/phy.h b/include/linux/phy.h index 3a09d2bf69ea..4901b2637059 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1566,8 +1566,9 @@ static inline int mdiobus_register_board_info(const struct mdio_board_info *i, /** - * module_phy_driver() - Helper macro for registering PHY drivers + * phy_module_driver() - Helper macro for registering PHY drivers * @__phy_drivers: array of PHY drivers to register + * @__count: Numbers of members in array * * Helper macro for PHY drivers which do not do anything special in module * init/exit. Each module may only use this macro once, and calling it -- cgit v1.2.3 From 4069a572d423b73919ae40a500dfc4b10f8a6f32 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 23 Sep 2020 00:29:03 +0200 Subject: net: phy: Document core PHY structures Add kerneldoc for the core PHY data structures, a few inline functions and exported functions which are not already documented. v2 Typos g/phy/PHY/s Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 423 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 292 insertions(+), 131 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 4901b2637059..eb3cb1a98b45 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -82,7 +82,39 @@ extern const int phy_10gbit_features_array[1]; #define PHY_POLL_CABLE_TEST 0x00000004 #define MDIO_DEVICE_IS_PHY 0x80000000 -/* Interface Mode definitions */ +/** + * enum phy_interface_t - Interface Mode definitions + * + * @PHY_INTERFACE_MODE_NA: Not Applicable - don't touch + * @PHY_INTERFACE_MODE_INTERNAL: No interface, MAC and PHY combined + * @PHY_INTERFACE_MODE_MII: Median-independent interface + * @PHY_INTERFACE_MODE_GMII: Gigabit median-independent interface + * @PHY_INTERFACE_MODE_SGMII: Serial gigabit media-independent interface + * @PHY_INTERFACE_MODE_TBI: Ten Bit Interface + * @PHY_INTERFACE_MODE_REVMII: Reverse Media Independent Interface + * @PHY_INTERFACE_MODE_RMII: Reduced Media Independent Interface + * @PHY_INTERFACE_MODE_RGMII: Reduced gigabit media-independent interface + * @PHY_INTERFACE_MODE_RGMII_ID: RGMII with Internal RX+TX delay + * @PHY_INTERFACE_MODE_RGMII_RXID: RGMII with Internal RX delay + * @PHY_INTERFACE_MODE_RGMII_TXID: RGMII with Internal RX delay + * @PHY_INTERFACE_MODE_RTBI: Reduced TBI + * @PHY_INTERFACE_MODE_SMII: ??? MII + * @PHY_INTERFACE_MODE_XGMII: 10 gigabit media-independent interface + * @PHY_INTERFACE_MODE_XLGMII:40 gigabit media-independent interface + * @PHY_INTERFACE_MODE_MOCA: Multimedia over Coax + * @PHY_INTERFACE_MODE_QSGMII: Quad SGMII + * @PHY_INTERFACE_MODE_TRGMII: Turbo RGMII + * @PHY_INTERFACE_MODE_1000BASEX: 1000 BaseX + * @PHY_INTERFACE_MODE_2500BASEX: 2500 BaseX + * @PHY_INTERFACE_MODE_RXAUI: Reduced XAUI + * @PHY_INTERFACE_MODE_XAUI: 10 Gigabit Attachment Unit Interface + * @PHY_INTERFACE_MODE_10GBASER: 10G BaseR + * @PHY_INTERFACE_MODE_USXGMII: Universal Serial 10GE MII + * @PHY_INTERFACE_MODE_10GKR: 10GBASE-KR - with Clause 73 AN + * @PHY_INTERFACE_MODE_MAX: Book keeping + * + * Describes the interface between the MAC and PHY. + */ typedef enum { PHY_INTERFACE_MODE_NA, PHY_INTERFACE_MODE_INTERNAL, @@ -116,8 +148,8 @@ typedef enum { } phy_interface_t; /** - * phy_supported_speeds - return all speeds currently supported by a phy device - * @phy: The phy device to return supported speeds of. + * phy_supported_speeds - return all speeds currently supported by a PHY device + * @phy: The PHY device to return supported speeds of. * @speeds: buffer to store supported speeds in. * @size: size of speeds buffer. * @@ -134,9 +166,9 @@ unsigned int phy_supported_speeds(struct phy_device *phy, * phy_modes - map phy_interface_t enum to device tree binding of phy-mode * @interface: enum phy_interface_t value * - * Description: maps 'enum phy_interface_t' defined in this file + * Description: maps enum &phy_interface_t defined in this file * into the device tree binding of 'phy-mode', so that Ethernet - * device driver can get phy interface from device tree. + * device driver can get PHY interface from device tree. */ static inline const char *phy_modes(phy_interface_t interface) { @@ -215,6 +247,14 @@ struct sfp_bus; struct sfp_upstream_ops; struct sk_buff; +/** + * struct mdio_bus_stats - Statistics counters for MDIO busses + * @transfers: Total number of transfers, i.e. @writes + @reads + * @errors: Number of MDIO transfers that returned an error + * @writes: Number of write transfers + * @reads: Number of read transfers + * @syncp: Synchronisation for incrementing statistics + */ struct mdio_bus_stats { u64_stats_t transfers; u64_stats_t errors; @@ -224,7 +264,15 @@ struct mdio_bus_stats { struct u64_stats_sync syncp; }; -/* Represents a shared structure between different phydev's in the same +/** + * struct phy_package_shared - Shared information in PHY packages + * @addr: Common PHY address used to combine PHYs in one package + * @refcnt: Number of PHYs connected to this shared data + * @flags: Initialization of PHY package + * @priv_size: Size of the shared private data @priv + * @priv: Driver private data shared across a PHY package + * + * Represents a shared structure between different phydev's in the same * package, for example a quad PHY. See phy_package_join() and * phy_package_leave(). */ @@ -247,7 +295,14 @@ struct phy_package_shared { #define PHY_SHARED_F_INIT_DONE 0 #define PHY_SHARED_F_PROBE_DONE 1 -/* +/** + * struct mii_bus - Represents an MDIO bus + * + * @owner: Who owns this device + * @name: User friendly name for this MDIO device, or driver name + * @id: Unique identifier for this bus, typical from bus hierarchy + * @priv: Driver private data + * * The Bus class for PHYs. Devices which provide access to * PHYs should register using this structure */ @@ -256,49 +311,58 @@ struct mii_bus { const char *name; char id[MII_BUS_ID_SIZE]; void *priv; + /** @read: Perform a read transfer on the bus */ int (*read)(struct mii_bus *bus, int addr, int regnum); + /** @write: Perform a write transfer on the bus */ int (*write)(struct mii_bus *bus, int addr, int regnum, u16 val); + /** @reset: Perform a reset of the bus */ int (*reset)(struct mii_bus *bus); + + /** @stats: Statistic counters per device on the bus */ struct mdio_bus_stats stats[PHY_MAX_ADDR]; - /* - * A lock to ensure that only one thing can read/write + /** + * @mdio_lock: A lock to ensure that only one thing can read/write * the MDIO bus at a time */ struct mutex mdio_lock; + /** @parent: Parent device of this bus */ struct device *parent; + /** @state: State of bus structure */ enum { MDIOBUS_ALLOCATED = 1, MDIOBUS_REGISTERED, MDIOBUS_UNREGISTERED, MDIOBUS_RELEASED, } state; + + /** @dev: Kernel device representation */ struct device dev; - /* list of all PHYs on bus */ + /** @mdio_map: list of all MDIO devices on bus */ struct mdio_device *mdio_map[PHY_MAX_ADDR]; - /* PHY addresses to be ignored when probing */ + /** @phy_mask: PHY addresses to be ignored when probing */ u32 phy_mask; - /* PHY addresses to ignore the TA/read failure */ + /** @phy_ignore_ta_mask: PHY addresses to ignore the TA/read failure */ u32 phy_ignore_ta_mask; - /* - * An array of interrupts, each PHY's interrupt at the index + /** + * @irq: An array of interrupts, each PHY's interrupt at the index * matching its address */ int irq[PHY_MAX_ADDR]; - /* GPIO reset pulse width in microseconds */ + /** @reset_delay_us: GPIO reset pulse width in microseconds */ int reset_delay_us; - /* GPIO reset deassert delay in microseconds */ + /** @reset_post_delay_us: GPIO reset deassert delay in microseconds */ int reset_post_delay_us; - /* RESET GPIO descriptor pointer */ + /** @reset_gpiod: Reset GPIO descriptor pointer */ struct gpio_desc *reset_gpiod; - /* bus capabilities, used for probing */ + /** @probe_capabilities: bus capabilities, used for probing */ enum { MDIOBUS_NO_CAP = 0, MDIOBUS_C22, @@ -306,15 +370,22 @@ struct mii_bus { MDIOBUS_C22_C45, } probe_capabilities; - /* protect access to the shared element */ + /** @shared_lock: protect access to the shared element */ struct mutex shared_lock; - /* shared state across different PHYs */ + /** @shared: shared state across different PHYs */ struct phy_package_shared *shared[PHY_MAX_ADDR]; }; #define to_mii_bus(d) container_of(d, struct mii_bus, dev) -struct mii_bus *mdiobus_alloc_size(size_t); +struct mii_bus *mdiobus_alloc_size(size_t size); + +/** + * mdiobus_alloc - Allocate an MDIO bus structure + * + * The internal state of the MDIO bus will be set of MDIOBUS_ALLOCATED ready + * for the driver to register the bus. + */ static inline struct mii_bus *mdiobus_alloc(void) { return mdiobus_alloc_size(0); @@ -341,40 +412,41 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); #define PHY_INTERRUPT_DISABLED false #define PHY_INTERRUPT_ENABLED true -/* PHY state machine states: +/** + * enum phy_state - PHY state machine states: * - * DOWN: PHY device and driver are not ready for anything. probe + * @PHY_DOWN: PHY device and driver are not ready for anything. probe * should be called if and only if the PHY is in this state, * given that the PHY device exists. - * - PHY driver probe function will set the state to READY + * - PHY driver probe function will set the state to @PHY_READY * - * READY: PHY is ready to send and receive packets, but the + * @PHY_READY: PHY is ready to send and receive packets, but the * controller is not. By default, PHYs which do not implement * probe will be set to this state by phy_probe(). * - start will set the state to UP * - * UP: The PHY and attached device are ready to do work. + * @PHY_UP: The PHY and attached device are ready to do work. * Interrupts should be started here. - * - timer moves to NOLINK or RUNNING + * - timer moves to @PHY_NOLINK or @PHY_RUNNING * - * NOLINK: PHY is up, but not currently plugged in. - * - irq or timer will set RUNNING if link comes back - * - phy_stop moves to HALTED + * @PHY_NOLINK: PHY is up, but not currently plugged in. + * - irq or timer will set @PHY_RUNNING if link comes back + * - phy_stop moves to @PHY_HALTED * - * RUNNING: PHY is currently up, running, and possibly sending + * @PHY_RUNNING: PHY is currently up, running, and possibly sending * and/or receiving packets - * - irq or timer will set NOLINK if link goes down - * - phy_stop moves to HALTED + * - irq or timer will set @PHY_NOLINK if link goes down + * - phy_stop moves to @PHY_HALTED * - * CABLETEST: PHY is performing a cable test. Packet reception/sending + * @PHY_CABLETEST: PHY is performing a cable test. Packet reception/sending * is not expected to work, carrier will be indicated as down. PHY will be * poll once per second, or on interrupt for it current state. * Once complete, move to UP to restart the PHY. - * - phy_stop aborts the running test and moves to HALTED + * - phy_stop aborts the running test and moves to @PHY_HALTED * - * HALTED: PHY is up, but no polling or interrupts are done. Or + * @PHY_HALTED: PHY is up, but no polling or interrupts are done. Or * PHY is in an error state. - * - phy_start moves to UP + * - phy_start moves to @PHY_UP */ enum phy_state { PHY_DOWN = 0, @@ -403,34 +475,67 @@ struct phy_c45_device_ids { struct macsec_context; struct macsec_ops; -/* phy_device: An instance of a PHY +/** + * struct phy_device - An instance of a PHY * - * drv: Pointer to the driver for this PHY instance - * phy_id: UID for this device found during discovery - * c45_ids: 802.3-c45 Device Identifers if is_c45. - * is_c45: Set to true if this phy uses clause 45 addressing. - * is_internal: Set to true if this phy is internal to a MAC. - * is_pseudo_fixed_link: Set to true if this phy is an Ethernet switch, etc. - * is_gigabit_capable: Set to true if PHY supports 1000Mbps - * has_fixups: Set to true if this phy has fixups/quirks. - * suspended: Set to true if this phy has been suspended successfully. - * suspended_by_mdio_bus: Set to true if this phy was suspended by MDIO bus. - * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. - * loopback_enabled: Set true if this phy has been loopbacked successfully. - * downshifted_rate: Set true if link speed has been downshifted. - * state: state of the PHY for management purposes - * dev_flags: Device-specific flags used by the PHY driver. - * irq: IRQ number of the PHY's interrupt (-1 if none) - * phy_timer: The timer for handling the state machine - * sfp_bus_attached: flag indicating whether the SFP bus has been attached - * sfp_bus: SFP bus attached to this PHY's fiber port - * attached_dev: The attached enet driver's device instance ptr - * adjust_link: Callback for the enet controller to respond to - * changes in the link state. - * macsec_ops: MACsec offloading ops. + * @mdio: MDIO bus this PHY is on + * @drv: Pointer to the driver for this PHY instance + * @phy_id: UID for this device found during discovery + * @c45_ids: 802.3-c45 Device Identifiers if is_c45. + * @is_c45: Set to true if this PHY uses clause 45 addressing. + * @is_internal: Set to true if this PHY is internal to a MAC. + * @is_pseudo_fixed_link: Set to true if this PHY is an Ethernet switch, etc. + * @is_gigabit_capable: Set to true if PHY supports 1000Mbps + * @has_fixups: Set to true if this PHY has fixups/quirks. + * @suspended: Set to true if this PHY has been suspended successfully. + * @suspended_by_mdio_bus: Set to true if this PHY was suspended by MDIO bus. + * @sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. + * @loopback_enabled: Set true if this PHY has been loopbacked successfully. + * @downshifted_rate: Set true if link speed has been downshifted. + * @state: State of the PHY for management purposes + * @dev_flags: Device-specific flags used by the PHY driver. + * @irq: IRQ number of the PHY's interrupt (-1 if none) + * @phy_timer: The timer for handling the state machine + * @phylink: Pointer to phylink instance for this PHY + * @sfp_bus_attached: Flag indicating whether the SFP bus has been attached + * @sfp_bus: SFP bus attached to this PHY's fiber port + * @attached_dev: The attached enet driver's device instance ptr + * @adjust_link: Callback for the enet controller to respond to changes: in the + * link state. + * @phy_link_change: Callback for phylink for notification of link change + * @macsec_ops: MACsec offloading ops. * - * speed, duplex, pause, supported, advertising, lp_advertising, - * and autoneg are used like in mii_if_info + * @speed: Current link speed + * @duplex: Current duplex + * @pause: Current pause + * @asym_pause: Current asymmetric pause + * @supported: Combined MAC/PHY supported linkmodes + * @advertising: Currently advertised linkmodes + * @adv_old: Saved advertised while power saving for WoL + * @lp_advertising: Current link partner advertised linkmodes + * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited + * @autoneg: Flag autoneg being used + * @link: Current link state + * @autoneg_complete: Flag auto negotiation of the link has completed + * @mdix: Current crossover + * @mdix_ctrl: User setting of crossover + * @interrupts: Flag interrupts have been enabled + * @interface: enum phy_interface_t value + * @skb: Netlink message for cable diagnostics + * @nest: Netlink nest used for cable diagnostics + * @ehdr: nNtlink header for cable diagnostics + * @phy_led_triggers: Array of LED triggers + * @phy_num_led_triggers: Number of triggers in @phy_led_triggers + * @led_link_trigger: LED trigger for link up/down + * @last_triggered: last LED trigger for link speed + * @master_slave_set: User requested master/slave configuration + * @master_slave_get: Current master/slave advertisement + * @master_slave_state: Current master/slave configuration + * @mii_ts: Pointer to time stamper callbacks + * @lock: Mutex for serialization access to PHY + * @state_queue: Work queue for state machine + * @shared: Pointer to private data shared by phys in one package + * @priv: Pointer to driver private data * * interrupts currently only supports enabled or disabled, * but could be changed in the future to support enabling @@ -550,9 +655,18 @@ struct phy_device { #define to_phy_device(d) container_of(to_mdio_device(d), \ struct phy_device, mdio) -/* A structure containing possible configuration parameters +/** + * struct phy_tdr_config - Configuration of a TDR raw test + * + * @first: Distance for first data collection point + * @last: Distance for last data collection point + * @step: Step between data collection points + * @pair: Bitmap of cable pairs to collect data for + * + * A structure containing possible configuration parameters * for a TDR cable test. The driver does not need to implement * all the parameters, but should report what is actually used. + * All distances are in centimeters. */ struct phy_tdr_config { u32 first; @@ -562,18 +676,20 @@ struct phy_tdr_config { }; #define PHY_PAIR_ALL -1 -/* struct phy_driver: Driver structure for a particular PHY type +/** + * struct phy_driver - Driver structure for a particular PHY type * - * driver_data: static driver data - * phy_id: The result of reading the UID registers of this PHY + * @mdiodrv: Data common to all MDIO devices + * @phy_id: The result of reading the UID registers of this PHY * type, and ANDing them with the phy_id_mask. This driver * only works for PHYs with IDs which match this field - * name: The friendly name of this PHY type - * phy_id_mask: Defines the important bits of the phy_id - * features: A mandatory list of features (speed, duplex, etc) + * @name: The friendly name of this PHY type + * @phy_id_mask: Defines the important bits of the phy_id + * @features: A mandatory list of features (speed, duplex, etc) * supported by this PHY - * flags: A bitfield defining certain other features this PHY + * @flags: A bitfield defining certain other features this PHY * supports (like interrupts) + * @driver_data: Static driver data * * All functions are optional. If config_aneg or read_status * are not implemented, the phy core uses the genphy versions. @@ -592,151 +708,178 @@ struct phy_driver { u32 flags; const void *driver_data; - /* - * Called to issue a PHY software reset + /** + * @soft_reset: Called to issue a PHY software reset */ int (*soft_reset)(struct phy_device *phydev); - /* - * Called to initialize the PHY, + /** + * @config_init: Called to initialize the PHY, * including after a reset */ int (*config_init)(struct phy_device *phydev); - /* - * Called during discovery. Used to set + /** + * @probe: Called during discovery. Used to set * up device-specific structures, if any */ int (*probe)(struct phy_device *phydev); - /* - * Probe the hardware to determine what abilities it has. - * Should only set phydev->supported. + /** + * @get_features: Probe the hardware to determine what + * abilities it has. Should only set phydev->supported. */ int (*get_features)(struct phy_device *phydev); /* PHY Power Management */ + /** @suspend: Suspend the hardware, saving state if needed */ int (*suspend)(struct phy_device *phydev); + /** @resume: Resume the hardware, restoring state if needed */ int (*resume)(struct phy_device *phydev); - /* - * Configures the advertisement and resets + /** + * @config_aneg: Configures the advertisement and resets * autonegotiation if phydev->autoneg is on, * forces the speed to the current settings in phydev * if phydev->autoneg is off */ int (*config_aneg)(struct phy_device *phydev); - /* Determines the auto negotiation result */ + /** @aneg_done: Determines the auto negotiation result */ int (*aneg_done)(struct phy_device *phydev); - /* Determines the negotiated speed and duplex */ + /** @read_status: Determines the negotiated speed and duplex */ int (*read_status)(struct phy_device *phydev); - /* Clears any pending interrupts */ + /** @ack_interrupt: Clears any pending interrupts */ int (*ack_interrupt)(struct phy_device *phydev); - /* Enables or disables interrupts */ + /** @config_intr: Enables or disables interrupts */ int (*config_intr)(struct phy_device *phydev); - /* - * Checks if the PHY generated an interrupt. + /** + * @did_interrupt: Checks if the PHY generated an interrupt. * For multi-PHY devices with shared PHY interrupt pin * Set interrupt bits have to be cleared. */ int (*did_interrupt)(struct phy_device *phydev); - /* Override default interrupt handling */ + /** @handle_interrupt: Override default interrupt handling */ irqreturn_t (*handle_interrupt)(struct phy_device *phydev); - /* Clears up any memory if needed */ + /** @remove: Clears up any memory if needed */ void (*remove)(struct phy_device *phydev); - /* Returns true if this is a suitable driver for the given - * phydev. If NULL, matching is based on phy_id and - * phy_id_mask. + /** + * @match_phy_device: Returns true if this is a suitable + * driver for the given phydev. If NULL, matching is based on + * phy_id and phy_id_mask. */ int (*match_phy_device)(struct phy_device *phydev); - /* Some devices (e.g. qnap TS-119P II) require PHY register changes to - * enable Wake on LAN, so set_wol is provided to be called in the - * ethernet driver's set_wol function. */ + /** + * @set_wol: Some devices (e.g. qnap TS-119P II) require PHY + * register changes to enable Wake on LAN, so set_wol is + * provided to be called in the ethernet driver's set_wol + * function. + */ int (*set_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol); - /* See set_wol, but for checking whether Wake on LAN is enabled. */ + /** + * @get_wol: See set_wol, but for checking whether Wake on LAN + * is enabled. + */ void (*get_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol); - /* - * Called to inform a PHY device driver when the core is about to - * change the link state. This callback is supposed to be used as - * fixup hook for drivers that need to take action when the link - * state changes. Drivers are by no means allowed to mess with the + /** + * @link_change_notify: Called to inform a PHY device driver + * when the core is about to change the link state. This + * callback is supposed to be used as fixup hook for drivers + * that need to take action when the link state + * changes. Drivers are by no means allowed to mess with the * PHY device structure in their implementations. */ void (*link_change_notify)(struct phy_device *dev); - /* - * Phy specific driver override for reading a MMD register. - * This function is optional for PHY specific drivers. When - * not provided, the default MMD read function will be used - * by phy_read_mmd(), which will use either a direct read for - * Clause 45 PHYs or an indirect read for Clause 22 PHYs. - * devnum is the MMD device number within the PHY device, - * regnum is the register within the selected MMD device. + /** + * @read_mmd: PHY specific driver override for reading a MMD + * register. This function is optional for PHY specific + * drivers. When not provided, the default MMD read function + * will be used by phy_read_mmd(), which will use either a + * direct read for Clause 45 PHYs or an indirect read for + * Clause 22 PHYs. devnum is the MMD device number within the + * PHY device, regnum is the register within the selected MMD + * device. */ int (*read_mmd)(struct phy_device *dev, int devnum, u16 regnum); - /* - * Phy specific driver override for writing a MMD register. - * This function is optional for PHY specific drivers. When - * not provided, the default MMD write function will be used - * by phy_write_mmd(), which will use either a direct write for - * Clause 45 PHYs, or an indirect write for Clause 22 PHYs. - * devnum is the MMD device number within the PHY device, - * regnum is the register within the selected MMD device. - * val is the value to be written. + /** + * @write_mmd: PHY specific driver override for writing a MMD + * register. This function is optional for PHY specific + * drivers. When not provided, the default MMD write function + * will be used by phy_write_mmd(), which will use either a + * direct write for Clause 45 PHYs, or an indirect write for + * Clause 22 PHYs. devnum is the MMD device number within the + * PHY device, regnum is the register within the selected MMD + * device. val is the value to be written. */ int (*write_mmd)(struct phy_device *dev, int devnum, u16 regnum, u16 val); + /** @read_page: Return the current PHY register page number */ int (*read_page)(struct phy_device *dev); + /** @write_page: Set the current PHY register page number */ int (*write_page)(struct phy_device *dev, int page); - /* Get the size and type of the eeprom contained within a plug-in - * module */ + /** + * @module_info: Get the size and type of the eeprom contained + * within a plug-in module + */ int (*module_info)(struct phy_device *dev, struct ethtool_modinfo *modinfo); - /* Get the eeprom information from the plug-in module */ + /** + * @module_eeprom: Get the eeprom information from the plug-in + * module + */ int (*module_eeprom)(struct phy_device *dev, struct ethtool_eeprom *ee, u8 *data); - /* Start a cable test */ + /** @cable_test_start: Start a cable test */ int (*cable_test_start)(struct phy_device *dev); - /* Start a raw TDR cable test */ + /** @cable_test_tdr_start: Start a raw TDR cable test */ int (*cable_test_tdr_start)(struct phy_device *dev, const struct phy_tdr_config *config); - /* Once per second, or on interrupt, request the status of the - * test. + /** + * @cable_test_get_status: Once per second, or on interrupt, + * request the status of the test. */ int (*cable_test_get_status)(struct phy_device *dev, bool *finished); - /* Get statistics from the phy using ethtool */ + /* Get statistics from the PHY using ethtool */ + /** @get_sset_count: Number of statistic counters */ int (*get_sset_count)(struct phy_device *dev); + /** @get_strings: Names of the statistic counters */ void (*get_strings)(struct phy_device *dev, u8 *data); + /** @get_stats: Return the statistic counter values */ void (*get_stats)(struct phy_device *dev, struct ethtool_stats *stats, u64 *data); /* Get and Set PHY tunables */ + /** @get_tunable: Return the value of a tunable */ int (*get_tunable)(struct phy_device *dev, struct ethtool_tunable *tuna, void *data); + /** @set_tunable: Set the value of a tunable */ int (*set_tunable)(struct phy_device *dev, struct ethtool_tunable *tuna, const void *data); + /** @set_loopback: Set the loopback mood of the PHY */ int (*set_loopback)(struct phy_device *dev, bool enable); + /** @get_sqi: Get the signal quality indication */ int (*get_sqi)(struct phy_device *dev); + /** @get_sqi_max: Get the maximum signal quality indication */ int (*get_sqi_max)(struct phy_device *dev); }; #define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ @@ -890,6 +1033,24 @@ static inline int __phy_modify_changed(struct phy_device *phydev, u32 regnum, */ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); +/** + * phy_read_mmd_poll_timeout - Periodically poll a PHY register until a + * condition is met or a timeout occurs + * + * @phydev: The phy_device struct + * @devaddr: The MMD to read from + * @regnum: The register on the MMD to read + * @val: Variable to read the register into + * @cond: Break condition (usually involving @val) + * @sleep_us: Maximum time to sleep between reads in us (0 + * tight-loops). Should be less than ~20ms since usleep_range + * is used (see Documentation/timers/timers-howto.rst). + * @timeout_us: Timeout in us, 0 means never timeout + * @sleep_before_read: if it is true, sleep @sleep_us before read. + * Returns 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @args is stored in @val. Must not + * be called from atomic context if sleep_us or timeout_us are used. + */ #define phy_read_mmd_poll_timeout(phydev, devaddr, regnum, val, cond, \ sleep_us, timeout_us, sleep_before_read) \ ({ \ @@ -1161,7 +1322,7 @@ static inline bool phy_is_internal(struct phy_device *phydev) /** * phy_interface_mode_is_rgmii - Convenience function for testing if a * PHY interface mode is RGMII (all variants) - * @mode: the phy_interface_t enum + * @mode: the &phy_interface_t enum */ static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode) { @@ -1170,11 +1331,11 @@ static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode) }; /** - * phy_interface_mode_is_8023z() - does the phy interface mode use 802.3z + * phy_interface_mode_is_8023z() - does the PHY interface mode use 802.3z * negotiation * @mode: one of &enum phy_interface_t * - * Returns true if the phy interface mode uses the 16-bit negotiation + * Returns true if the PHY interface mode uses the 16-bit negotiation * word as defined in 802.3z. (See 802.3-2015 37.2.1 Config_Reg encoding) */ static inline bool phy_interface_mode_is_8023z(phy_interface_t mode) @@ -1193,7 +1354,7 @@ static inline bool phy_interface_is_rgmii(struct phy_device *phydev) return phy_interface_mode_is_rgmii(phydev->interface); }; -/* +/** * phy_is_pseudo_fixed_link - Convenience function for testing if this * PHY is the CPU port facing side of an Ethernet switch, or similar. * @phydev: the phy_device struct -- cgit v1.2.3 From 70fb2612aab62d47e03f82eaa7384a8d30ca175d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Sep 2020 21:26:23 -0700 Subject: fscrypt: don't call no-key names "ciphertext names" Currently we're using the term "ciphertext name" ambiguously because it can mean either the actual ciphertext filename, or the encoded filename that is shown when an encrypted directory is listed without its key. The latter we're now usually calling the "no-key name"; and while it's derived from the ciphertext name, it's not the same thing. To avoid this ambiguity, rename fscrypt_name::is_ciphertext_name to fscrypt_name::is_nokey_name, and update comments that say "ciphertext name" (or "encrypted name") to say "no-key name" instead when warranted. Link: https://lore.kernel.org/r/20200924042624.98439-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index fc67c4cbaa96..bc9ec727e993 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -35,7 +35,7 @@ struct fscrypt_name { u32 hash; u32 minor_hash; struct fscrypt_str crypto_buf; - bool is_ciphertext_name; + bool is_nokey_name; }; #define FSTR_INIT(n, l) { .name = n, .len = l } @@ -730,17 +730,16 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir, * @fname: (output) the name to use to search the on-disk directory * * Prepare for ->lookup() in a directory which may be encrypted by determining - * the name that will actually be used to search the directory on-disk. Lookups - * can be done with or without the directory's encryption key; without the key, - * filenames are presented in encrypted form. Therefore, we'll try to set up - * the directory's encryption key, but even without it the lookup can continue. + * the name that will actually be used to search the directory on-disk. If the + * directory's encryption key is available, then the lookup is assumed to be by + * plaintext name; otherwise, it is assumed to be by no-key name. * * This also installs a custom ->d_revalidate() method which will invalidate the * dentry if it was created without the key and the key is later added. * - * Return: 0 on success; -ENOENT if key is unavailable but the filename isn't a - * correctly formed encoded ciphertext name, so a negative dentry should be - * created; or another -errno code. + * Return: 0 on success; -ENOENT if the directory's key is unavailable but the + * filename isn't a valid no-key name, so a negative dentry should be created; + * or another -errno code. */ static inline int fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, -- cgit v1.2.3 From 501e43fbea468fa93c1dff0ee744e69303ef5a43 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Sep 2020 21:26:24 -0700 Subject: fscrypt: rename DCACHE_ENCRYPTED_NAME to DCACHE_NOKEY_NAME Originally we used the term "encrypted name" or "ciphertext name" to mean the encoded filename that is shown when an encrypted directory is listed without its key. But these terms are ambiguous since they also mean the filename stored on-disk. "Encrypted name" is especially ambiguous since it could also be understood to mean "this filename is encrypted on-disk", similar to "encrypted file". So we've started calling these encoded names "no-key names" instead. Therefore, rename DCACHE_ENCRYPTED_NAME to DCACHE_NOKEY_NAME to avoid confusion about what this flag means. Link: https://lore.kernel.org/r/20200924042624.98439-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/dcache.h | 2 +- include/linux/fscrypt.h | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 65d975bf9390..6f95c3300cbb 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -213,7 +213,7 @@ struct dentry_operations { #define DCACHE_MAY_FREE 0x00800000 #define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ -#define DCACHE_ENCRYPTED_NAME 0x02000000 /* Encrypted name (dir key was unavailable) */ +#define DCACHE_NOKEY_NAME 0x02000000 /* Encrypted name encoded without key */ #define DCACHE_OP_REAL 0x04000000 #define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index bc9ec727e993..f1757e73162d 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -100,15 +100,15 @@ static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) } /* - * When d_splice_alias() moves a directory's encrypted alias to its decrypted - * alias as a result of the encryption key being added, DCACHE_ENCRYPTED_NAME - * must be cleared. Note that we don't have to support arbitrary moves of this - * flag because fscrypt doesn't allow encrypted aliases to be the source or - * target of a rename(). + * When d_splice_alias() moves a directory's no-key alias to its plaintext alias + * as a result of the encryption key being added, DCACHE_NOKEY_NAME must be + * cleared. Note that we don't have to support arbitrary moves of this flag + * because fscrypt doesn't allow no-key names to be the source or target of a + * rename(). */ static inline void fscrypt_handle_d_move(struct dentry *dentry) { - dentry->d_flags &= ~DCACHE_ENCRYPTED_NAME; + dentry->d_flags &= ~DCACHE_NOKEY_NAME; } /* crypto.c */ -- cgit v1.2.3 From fe205d984e7730f4d21f6f8ebc60f0698404ac31 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 18 Aug 2020 22:24:27 +0800 Subject: ACPI: Remove side effect of partly creating a node in acpi_map_pxm_to_online_node() While this function will only return an online node, it can have the side effect of partially creating a new node. The existing comments suggest this is intentional, but the usecases of this function are related to NFIT and HMAT parsing, neither of which should be able to define new nodes. One route by which the existing behaviour would cause a crash is to have a _PXM entry in ACPI DSDT attempt to place a device within this partly created proximity domain. A subsequent call to devm_kzalloc() or similar would result in an attempt to allocate memory on a node for which zone lists have not been set up and a NULL pointer dereference. Prevent such cases by switching to pxm_to_node() within acpi_map_pxm_to_online_node() which cannot cause a new node to be partly created. If one would previously have been created we now return NO_NUMA_NODE. Documentation updated to reflect this change. Signed-off-by: Jonathan Cameron Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 1e4cdc6c7ae2..a9fd122ae878 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -430,13 +430,12 @@ int acpi_get_node(acpi_handle handle); * ACPI device drivers, which are called after the NUMA initialization has * completed in the kernel, can call this interface to obtain their device * NUMA topology from ACPI tables. Such drivers do not have to deal with - * offline nodes. A node may be offline when a device proximity ID is - * unique, SRAT memory entry does not exist, or NUMA is disabled, ex. - * "numa=off" on x86. + * offline nodes. A node may be offline when SRAT memory entry does not exist, + * or NUMA is disabled, ex. "numa=off" on x86. */ static inline int acpi_map_pxm_to_online_node(int pxm) { - int node = acpi_map_pxm_to_node(pxm); + int node = pxm_to_node(pxm); return numa_map_to_online_node(node); } -- cgit v1.2.3 From 4eb3723f18e9ba4d4b13d82b6f7e68dd50a852ea Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 18 Aug 2020 22:24:28 +0800 Subject: ACPI: Rename acpi_map_pxm_to_online_node() to pxm_to_online_node() As this function is no longer allowed to create new mappings let us rename it to reflect this. Note all nodes should already exist before any of the users of this function are called. Signed-off-by: Jonathan Cameron Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a9fd122ae878..e9f6cd67943e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -420,10 +420,10 @@ int acpi_map_pxm_to_node(int pxm); int acpi_get_node(acpi_handle handle); /** - * acpi_map_pxm_to_online_node - Map proximity ID to online node + * pxm_to_online_node - Map proximity ID to online node * @pxm: ACPI proximity ID * - * This is similar to acpi_map_pxm_to_node(), but always returns an online + * This is similar to pxm_to_node(), but always returns an online * node. When the mapped node from a given proximity ID is offline, it * looks up the node distance table and returns the nearest online node. * @@ -433,14 +433,14 @@ int acpi_get_node(acpi_handle handle); * offline nodes. A node may be offline when SRAT memory entry does not exist, * or NUMA is disabled, ex. "numa=off" on x86. */ -static inline int acpi_map_pxm_to_online_node(int pxm) +static inline int pxm_to_online_node(int pxm) { int node = pxm_to_node(pxm); return numa_map_to_online_node(node); } #else -static inline int acpi_map_pxm_to_online_node(int pxm) +static inline int pxm_to_online_node(int pxm) { return 0; } -- cgit v1.2.3 From 76bd5c016ef49683d626a48748ef1764aaf8ba63 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 14 Sep 2020 17:05:08 -0400 Subject: NFSv4: make cache consistency bitmask dynamic Client uses static bitmask for GETATTR on CLOSE/WRITE/DELEGRETURN and ignores the fact that it might have some attributes marked invalid in its cache. Compared to v3 where all attributes are retrieved in postop attributes, v4's cache is frequently out of sync and leads to standalone GETATTRs being sent to the server. Instead, in addition to the minimum cache consistency attributes also check cache_validity and adjust the GETATTR request accordingly. Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 69cb46f7b8d2..0599efd57eb9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -525,7 +525,7 @@ struct nfs_closeargs { struct nfs_seqid * seqid; fmode_t fmode; u32 share_access; - const u32 * bitmask; + u32 * bitmask; struct nfs4_layoutreturn_args *lr_args; }; @@ -608,7 +608,7 @@ struct nfs4_delegreturnargs { struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; const nfs4_stateid *stateid; - const u32 * bitmask; + u32 * bitmask; struct nfs4_layoutreturn_args *lr_args; }; @@ -648,7 +648,7 @@ struct nfs_pgio_args { union { unsigned int replen; /* used by read */ struct { - const u32 * bitmask; /* used by write */ + u32 * bitmask; /* used by write */ enum nfs3_stable_how stable; /* used by write */ }; }; -- cgit v1.2.3 From 402dd2cf46b177be5bcb138b7d7fd8f38aa130e4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:28 +0200 Subject: fs: remove the unused SB_I_MULTIROOT flag The last user of SB_I_MULTIROOT is disappeared with commit f2aedb713c28 ("NFS: Add fs_context support.") Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7519ae003a08..fbd74df5ce5f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1385,7 +1385,6 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ #define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ -#define SB_I_MULTIROOT 0x00000008 /* Multiple roots to the dentry tree */ /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ -- cgit v1.2.3 From c2e4cd57cfa1f627b786c764d185fff85fd12be9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:34 +0200 Subject: block: lift setting the readahead size into the block layer Drivers shouldn't really mess with the readahead size, as that is a VM concept. Instead set it based on the optimal I/O size by lifting the algorithm from the md driver when registering the disk. Also set bdi->io_pages there as well by applying the same scheme based on max_sectors. To ensure the limits work well for stacking drivers a new helper is added to update the readahead limits from the block limits, which is also called from disk_stack_limits. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Jan Kara Reviewed-by: Mike Snitzer Reviewed-by: Martin K. Petersen Acked-by: Coly Li Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index be5ef6f4ba19..282f5ca424f1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1140,6 +1140,7 @@ extern void blk_queue_max_zone_append_sectors(struct request_queue *q, extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); +void blk_queue_update_readahead(struct request_queue *q); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); -- cgit v1.2.3 From ed7b6b4f6e915cb0bc52d0000bcc63168867b6ac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:35 +0200 Subject: bdi: remove BDI_CAP_CGROUP_WRITEBACK Just checking SB_I_CGROUPWB for cgroup writeback support is enough. Either the file system allocates its own bdi (e.g. btrfs), in which case it is known to support cgroup writeback, or the bdi comes from the block layer, which always supports cgroup writeback. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0b06b2d26c9a..52583b6f2ea0 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -123,7 +123,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); * BDI_CAP_NO_ACCT_WB: Don't automatically account writeback pages * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. * - * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback. * BDI_CAP_SYNCHRONOUS_IO: Device is so fast that asynchronous IO would be * inefficient. */ @@ -233,9 +232,9 @@ int inode_congested(struct inode *inode, int cong_bits); * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode * @inode: inode of interest * - * cgroup writeback requires support from both the bdi and filesystem. - * Also, both memcg and iocg have to be on the default hierarchy. Test - * whether all conditions are met. + * Cgroup writeback requires support from the filesystem. Also, both memcg and + * iocg have to be on the default hierarchy. Test whether all conditions are + * met. * * Note that the test result may change dynamically on the same inode * depending on how memcg and iocg are configured. @@ -247,7 +246,6 @@ static inline bool inode_cgwb_enabled(struct inode *inode) return cgroup_subsys_on_dfl(memory_cgrp_subsys) && cgroup_subsys_on_dfl(io_cgrp_subsys) && bdi_cap_account_dirty(bdi) && - (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) && (inode->i_sb->s_iflags & SB_I_CGROUPWB); } -- cgit v1.2.3 From a8b456d01cd6b37191f14248f3e2bdbe5ce3a89e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:36 +0200 Subject: bdi: remove BDI_CAP_SYNCHRONOUS_IO BDI_CAP_SYNCHRONOUS_IO is only checked in the swap code, and used to decided if ->rw_page can be used on a block device. Just check up for the method instead. The only complication is that zram needs a second set of block_device_operations as it can switch between modes that actually support ->rw_page and those who don't. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 52583b6f2ea0..860ea33571bc 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -122,9 +122,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); * BDI_CAP_NO_WRITEBACK: Don't write pages back * BDI_CAP_NO_ACCT_WB: Don't automatically account writeback pages * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. - * - * BDI_CAP_SYNCHRONOUS_IO: Device is so fast that asynchronous IO would be - * inefficient. */ #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 @@ -132,7 +129,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); #define BDI_CAP_STABLE_WRITES 0x00000008 #define BDI_CAP_STRICTLIMIT 0x00000010 #define BDI_CAP_CGROUP_WRITEBACK 0x00000020 -#define BDI_CAP_SYNCHRONOUS_IO 0x00000040 #define BDI_CAP_NO_ACCT_AND_WRITEBACK \ (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) @@ -174,11 +170,6 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) long congestion_wait(int sync, long timeout); long wait_iff_congested(int sync, long timeout); -static inline bool bdi_cap_synchronous_io(struct backing_dev_info *bdi) -{ - return bdi->capabilities & BDI_CAP_SYNCHRONOUS_IO; -} - static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi) { return bdi->capabilities & BDI_CAP_STABLE_WRITES; -- cgit v1.2.3 From 1cb039f3dc1619eb795c54aad0a98fdb379b4237 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:38 +0200 Subject: bdi: replace BDI_CAP_STABLE_WRITES with a queue and a sb flag The BDI_CAP_STABLE_WRITES is one of the few bits of information in the backing_dev_info shared between the block drivers and the writeback code. To help untangling the dependency replace it with a queue flag and a superblock flag derived from it. This also helps with the case of e.g. a file system requiring stable writes due to its own checksumming, but not forcing it on other users of the block device like the swap code. One downside is that we an't support the stable_pages_required bdi attribute in sysfs anymore. It is replaced with a queue attribute which also is writable for easier testing. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 6 ------ include/linux/blkdev.h | 3 +++ include/linux/fs.h | 1 + 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 860ea33571bc..5da4ea3dd0cc 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -126,7 +126,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 #define BDI_CAP_NO_ACCT_WB 0x00000004 -#define BDI_CAP_STABLE_WRITES 0x00000008 #define BDI_CAP_STRICTLIMIT 0x00000010 #define BDI_CAP_CGROUP_WRITEBACK 0x00000020 @@ -170,11 +169,6 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) long congestion_wait(int sync, long timeout); long wait_iff_congested(int sync, long timeout); -static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi) -{ - return bdi->capabilities & BDI_CAP_STABLE_WRITES; -} - static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) { return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 282f5ca424f1..8e77f12de522 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -606,6 +606,7 @@ struct request_queue { #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ +#define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ #define QUEUE_FLAG_WC 17 /* Write back caching */ #define QUEUE_FLAG_FUA 18 /* device supports FUA writes */ @@ -635,6 +636,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) +#define blk_queue_stable_writes(q) \ + test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) diff --git a/include/linux/fs.h b/include/linux/fs.h index fbd74df5ce5f..222465b7cf41 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1385,6 +1385,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ #define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ +#define SB_I_STABLE_WRITES 0x00000008 /* don't modify blks until WB is done */ /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ -- cgit v1.2.3 From 823423ef55f4d9c470b1edc9c5b5c93d06abfaae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:39 +0200 Subject: bdi: invert BDI_CAP_NO_ACCT_WB Replace BDI_CAP_NO_ACCT_WB with a positive BDI_CAP_WRITEBACK_ACCT to make the checks more obvious. Also remove the pointless bdi_cap_account_writeback wrapper that just obsfucates the check. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 5da4ea3dd0cc..b217344a2c63 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -120,17 +120,17 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); * * BDI_CAP_NO_ACCT_DIRTY: Dirty pages shouldn't contribute to accounting * BDI_CAP_NO_WRITEBACK: Don't write pages back - * BDI_CAP_NO_ACCT_WB: Don't automatically account writeback pages + * BDI_CAP_WRITEBACK_ACCT: Automatically account writeback pages * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. */ #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 -#define BDI_CAP_NO_ACCT_WB 0x00000004 +#define BDI_CAP_WRITEBACK_ACCT 0x00000004 #define BDI_CAP_STRICTLIMIT 0x00000010 #define BDI_CAP_CGROUP_WRITEBACK 0x00000020 #define BDI_CAP_NO_ACCT_AND_WRITEBACK \ - (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) + (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY) extern struct backing_dev_info noop_backing_dev_info; @@ -179,13 +179,6 @@ static inline bool bdi_cap_account_dirty(struct backing_dev_info *bdi) return !(bdi->capabilities & BDI_CAP_NO_ACCT_DIRTY); } -static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi) -{ - /* Paranoia: BDI_CAP_NO_WRITEBACK implies BDI_CAP_NO_ACCT_WB */ - return !(bdi->capabilities & (BDI_CAP_NO_ACCT_WB | - BDI_CAP_NO_WRITEBACK)); -} - static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) { return bdi_cap_writeback_dirty(inode_to_bdi(mapping->host)); -- cgit v1.2.3 From f56753ac2a90810726334df04d735e9f8f5a32d9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:40 +0200 Subject: bdi: replace BDI_CAP_NO_{WRITEBACK,ACCT_DIRTY} with a single flag Replace the two negative flags that are always used together with a single positive flag that indicates the writeback capability instead of two related non-capabilities. Also remove the pointless wrappers to just check the flag. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 48 ++++++++++----------------------------------- 1 file changed, 10 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index b217344a2c63..44df4fcef65c 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -110,27 +110,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); /* * Flags in backing_dev_info::capability * - * The first three flags control whether dirty pages will contribute to the - * VM's accounting and whether writepages() should be called for dirty pages - * (something that would not, for example, be appropriate for ramfs) - * - * WARNING: these flags are closely related and should not normally be - * used separately. The BDI_CAP_NO_ACCT_AND_WRITEBACK combines these - * three flags into a single convenience macro. - * - * BDI_CAP_NO_ACCT_DIRTY: Dirty pages shouldn't contribute to accounting - * BDI_CAP_NO_WRITEBACK: Don't write pages back - * BDI_CAP_WRITEBACK_ACCT: Automatically account writeback pages - * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. + * BDI_CAP_WRITEBACK: Supports dirty page writeback, and dirty pages + * should contribute to accounting + * BDI_CAP_WRITEBACK_ACCT: Automatically account writeback pages + * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold */ -#define BDI_CAP_NO_ACCT_DIRTY 0x00000001 -#define BDI_CAP_NO_WRITEBACK 0x00000002 -#define BDI_CAP_WRITEBACK_ACCT 0x00000004 -#define BDI_CAP_STRICTLIMIT 0x00000010 -#define BDI_CAP_CGROUP_WRITEBACK 0x00000020 - -#define BDI_CAP_NO_ACCT_AND_WRITEBACK \ - (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY) +#define BDI_CAP_WRITEBACK (1 << 0) +#define BDI_CAP_WRITEBACK_ACCT (1 << 1) +#define BDI_CAP_STRICTLIMIT (1 << 2) extern struct backing_dev_info noop_backing_dev_info; @@ -169,24 +156,9 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) long congestion_wait(int sync, long timeout); long wait_iff_congested(int sync, long timeout); -static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) -{ - return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK); -} - -static inline bool bdi_cap_account_dirty(struct backing_dev_info *bdi) -{ - return !(bdi->capabilities & BDI_CAP_NO_ACCT_DIRTY); -} - -static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) -{ - return bdi_cap_writeback_dirty(inode_to_bdi(mapping->host)); -} - -static inline bool mapping_cap_account_dirty(struct address_space *mapping) +static inline bool mapping_can_writeback(struct address_space *mapping) { - return bdi_cap_account_dirty(inode_to_bdi(mapping->host)); + return inode_to_bdi(mapping->host)->capabilities & BDI_CAP_WRITEBACK; } static inline int bdi_sched_wait(void *word) @@ -223,7 +195,7 @@ static inline bool inode_cgwb_enabled(struct inode *inode) return cgroup_subsys_on_dfl(memory_cgrp_subsys) && cgroup_subsys_on_dfl(io_cgrp_subsys) && - bdi_cap_account_dirty(bdi) && + (bdi->capabilities & BDI_CAP_WRITEBACK) && (inode->i_sb->s_iflags & SB_I_CGROUPWB); } -- cgit v1.2.3 From aedcade6f4fa9a1e65f327fc42de3fb47660646c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 14 Aug 2020 17:40:26 -0700 Subject: debugobjects: Allow debug_obj_descr to be const The debugobject core could be slightly harder to corrupt if the debug_obj_descr would be a pointer to const memory. Depending on the architecture, const data structures are placed into read-only memory and thus are harder to corrupt or hijack. This descriptor is used to fix up stuff like timers and workqueues when core kernel data structures are busted, so moving the descriptors to read-only memory will make debugobjects more resilient to something going wrong and then corrupting the function pointers inside struct debug_obj_descr. Signed-off-by: Stephen Boyd Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20200815004027.2046113-2-swboyd@chromium.org --- include/linux/debugobjects.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h index afc416e5dcab..8d2dde23e9fb 100644 --- a/include/linux/debugobjects.h +++ b/include/linux/debugobjects.h @@ -30,7 +30,7 @@ struct debug_obj { enum debug_obj_state state; unsigned int astate; void *object; - struct debug_obj_descr *descr; + const struct debug_obj_descr *descr; }; /** @@ -64,14 +64,14 @@ struct debug_obj_descr { }; #ifdef CONFIG_DEBUG_OBJECTS -extern void debug_object_init (void *addr, struct debug_obj_descr *descr); +extern void debug_object_init (void *addr, const struct debug_obj_descr *descr); extern void -debug_object_init_on_stack(void *addr, struct debug_obj_descr *descr); -extern int debug_object_activate (void *addr, struct debug_obj_descr *descr); -extern void debug_object_deactivate(void *addr, struct debug_obj_descr *descr); -extern void debug_object_destroy (void *addr, struct debug_obj_descr *descr); -extern void debug_object_free (void *addr, struct debug_obj_descr *descr); -extern void debug_object_assert_init(void *addr, struct debug_obj_descr *descr); +debug_object_init_on_stack(void *addr, const struct debug_obj_descr *descr); +extern int debug_object_activate (void *addr, const struct debug_obj_descr *descr); +extern void debug_object_deactivate(void *addr, const struct debug_obj_descr *descr); +extern void debug_object_destroy (void *addr, const struct debug_obj_descr *descr); +extern void debug_object_free (void *addr, const struct debug_obj_descr *descr); +extern void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr); /* * Active state: @@ -79,26 +79,26 @@ extern void debug_object_assert_init(void *addr, struct debug_obj_descr *descr); * - Must return to 0 before deactivation. */ extern void -debug_object_active_state(void *addr, struct debug_obj_descr *descr, +debug_object_active_state(void *addr, const struct debug_obj_descr *descr, unsigned int expect, unsigned int next); extern void debug_objects_early_init(void); extern void debug_objects_mem_init(void); #else static inline void -debug_object_init (void *addr, struct debug_obj_descr *descr) { } +debug_object_init (void *addr, const struct debug_obj_descr *descr) { } static inline void -debug_object_init_on_stack(void *addr, struct debug_obj_descr *descr) { } +debug_object_init_on_stack(void *addr, const struct debug_obj_descr *descr) { } static inline int -debug_object_activate (void *addr, struct debug_obj_descr *descr) { return 0; } +debug_object_activate (void *addr, const struct debug_obj_descr *descr) { return 0; } static inline void -debug_object_deactivate(void *addr, struct debug_obj_descr *descr) { } +debug_object_deactivate(void *addr, const struct debug_obj_descr *descr) { } static inline void -debug_object_destroy (void *addr, struct debug_obj_descr *descr) { } +debug_object_destroy (void *addr, const struct debug_obj_descr *descr) { } static inline void -debug_object_free (void *addr, struct debug_obj_descr *descr) { } +debug_object_free (void *addr, const struct debug_obj_descr *descr) { } static inline void -debug_object_assert_init(void *addr, struct debug_obj_descr *descr) { } +debug_object_assert_init(void *addr, const struct debug_obj_descr *descr) { } static inline void debug_objects_early_init(void) { } static inline void debug_objects_mem_init(void) { } -- cgit v1.2.3 From b952caf2d5ca898cc10d63be7722ae7a5daca696 Mon Sep 17 00:00:00 2001 From: Qianli Zhao Date: Thu, 13 Aug 2020 23:03:14 +0800 Subject: timers: Mask invalid flags in do_init_timer() do_init_timer() accepts any combination of timer flags handed in by the caller without a sanity check, but only TIMER_DEFFERABLE, TIMER_PINNED and TIMER_IRQSAFE are valid. If the supplied flags have other bits set, this could result in malfunction. If bits are set in TIMER_CPUMASK the first timer usage could deference a cpu base which is outside the range of possible CPUs. If TIMER_MIGRATION is set, then the switch_timer_base() will live lock. Prevent that with a sanity check which warns when invalid flags are supplied and masks them out. [ tglx: Made it WARN_ON_ONCE() and added context to the changelog ] Signed-off-by: Qianli Zhao Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/9d79a8aa4eb56713af7379f99f062dedabcde140.1597326756.git.zhaoqianli@xiaomi.com --- include/linux/timer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 07910ae5ddd9..d10bc7e73b41 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -67,6 +67,7 @@ struct timer_list { #define TIMER_DEFERRABLE 0x00080000 #define TIMER_PINNED 0x00100000 #define TIMER_IRQSAFE 0x00200000 +#define TIMER_INIT_FLAGS (TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE) #define TIMER_ARRAYSHIFT 22 #define TIMER_ARRAYMASK 0xFFC00000 -- cgit v1.2.3 From 29bcff787a2593b2126cfaff612c0b4e560022e9 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Thu, 20 Aug 2020 09:22:08 -0400 Subject: md/raid5: add new xor function to support different page offset raid5 will call async_xor() and async_xor_val() to compute xor. For now, both of them require the common src/dst page offset. But, we want them to support different src/dst page offset for following shared page. Here, adding two new function async_xor_offs() and async_xor_val_offs() respectively for async_xor() and async_xor_val(). Signed-off-by: Yufen Yu Signed-off-by: Song Liu --- include/linux/async_tx.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 4c328fef403c..8901f3c801ee 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -162,11 +162,22 @@ struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, struct async_submit_ctl *submit); +struct dma_async_tx_descriptor * +async_xor_offs(struct page *dest, unsigned int offset, + struct page **src_list, unsigned int *src_offset, + int src_cnt, size_t len, struct async_submit_ctl *submit); + struct dma_async_tx_descriptor * async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, enum sum_check_flags *result, struct async_submit_ctl *submit); +struct dma_async_tx_descriptor * +async_xor_val_offs(struct page *dest, unsigned int offset, + struct page **src_list, unsigned int *src_offset, + int src_cnt, size_t len, enum sum_check_flags *result, + struct async_submit_ctl *submit); + struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, unsigned int src_offset, size_t len, -- cgit v1.2.3 From d69454bc9fdfb63e84152e0533454079ea3e38a3 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Thu, 20 Aug 2020 09:22:10 -0400 Subject: md/raid6: let syndrome computor support different page offset For now, syndrome compute functions require common offset in the pages array. However, we expect them to support different offset when try to use shared page in the following. Simplily covert them by adding page offset where each page address are referred. Since the only caller of async_gen_syndrome() and async_syndrome_val() are in raid6, we don't want to reserve the old interface but modify the interface directly. After that, replacing old interfaces with new ones for raid6 and raid6test. Signed-off-by: Yufen Yu Signed-off-by: Song Liu --- include/linux/async_tx.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 8901f3c801ee..efc5510f7d11 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -186,13 +186,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); struct dma_async_tx_descriptor * -async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt, +async_gen_syndrome(struct page **blocks, unsigned int *offsets, int src_cnt, size_t len, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * -async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, +async_syndrome_val(struct page **blocks, unsigned int *offsets, int src_cnt, size_t len, enum sum_check_flags *pqres, struct page *spare, - struct async_submit_ctl *submit); + unsigned int s_off, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb, -- cgit v1.2.3 From 4f86ff5580fa692f850f37f948c73814a24a722b Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Thu, 20 Aug 2020 09:22:11 -0400 Subject: md/raid6: let async recovery function support different page offset For now, asynchronous raid6 recovery calculate functions are require common offset for pages. But, we expect them to support different page offset after introducing stripe shared page. Do that by simplily adding page offset where each page address are referred. Then, replace the old interface with the new ones in raid6 and raid6test. Signed-off-by: Yufen Yu Signed-off-by: Song Liu --- include/linux/async_tx.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index efc5510f7d11..5cc73d7e5b52 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -196,11 +196,13 @@ async_syndrome_val(struct page **blocks, unsigned int *offsets, int src_cnt, struct dma_async_tx_descriptor * async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb, - struct page **ptrs, struct async_submit_ctl *submit); + struct page **ptrs, unsigned int *offs, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_raid6_datap_recov(int src_num, size_t bytes, int faila, - struct page **ptrs, struct async_submit_ctl *submit); + struct page **ptrs, unsigned int *offs, + struct async_submit_ctl *submit); void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ -- cgit v1.2.3 From b5b6775d72e8662b5ea7f892bf06db4831e2c1aa Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 23 Sep 2020 18:41:22 +0300 Subject: of: add of_mdio_find_device() api Add a helper function which finds the mdio_device structure given a device tree node. This is helpful for finding the PCS device based on a DTS node but managing it as a mdio_device instead of a phy_device. Signed-off-by: Russell King Signed-off-by: Ioana Ciornei Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 1efb88d9f892..cfe8c607a628 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -17,6 +17,7 @@ bool of_mdiobus_child_is_phy(struct device_node *child); int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np); int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, struct device_node *np); +struct mdio_device *of_mdio_find_device(struct device_node *np); struct phy_device *of_phy_find_device(struct device_node *phy_np); struct phy_device * of_phy_connect(struct net_device *dev, struct device_node *phy_np, @@ -74,6 +75,11 @@ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node * return mdiobus_register(mdio); } +static inline struct mdio_device *of_mdio_find_device(struct device_node *np) +{ + return NULL; +} + static inline struct phy_device *of_phy_find_device(struct device_node *phy_np) { return NULL; -- cgit v1.2.3 From 7ae10eb903d6ed9b3069fe1dc175b36d86667d09 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 09:29:50 +0200 Subject: dma-mapping: remove DMA_MASK_NONE This value is only used by a PCMCIA driver and not very useful. Signed-off-by: Christoph Hellwig Acked-by: Dominik Brodowski --- include/linux/dma-mapping.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index bb138ac6f5e6..e074588d753f 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -138,8 +138,6 @@ extern const struct dma_map_ops dma_dummy_ops; #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) -#define DMA_MASK_NONE 0x0ULL - static inline int valid_dma_direction(int dma_direction) { return ((dma_direction == DMA_BIDIRECTIONAL) || -- cgit v1.2.3 From db4268f8c575617bacdeff862c7de674dbf65075 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 09:31:32 +0200 Subject: dma-mapping: move valid_dma_direction to dma-direction.h Move the valid_dma_direction helper to a more suitable header, and clean it up to use the proper enum as well as removing pointless braces. Signed-off-by: Christoph Hellwig --- include/linux/dma-direction.h | 8 +++++++- include/linux/dma-mapping.h | 7 ------- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-direction.h b/include/linux/dma-direction.h index 9c96e30e6a0b..a2fe4571bc92 100644 --- a/include/linux/dma-direction.h +++ b/include/linux/dma-direction.h @@ -9,4 +9,10 @@ enum dma_data_direction { DMA_NONE = 3, }; -#endif +static inline int valid_dma_direction(enum dma_data_direction dir) +{ + return dir == DMA_BIDIRECTIONAL || dir == DMA_TO_DEVICE || + dir == DMA_FROM_DEVICE; +} + +#endif /* _LINUX_DMA_DIRECTION_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index e074588d753f..51e93d44b826 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -138,13 +138,6 @@ extern const struct dma_map_ops dma_dummy_ops; #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) -static inline int valid_dma_direction(int dma_direction) -{ - return ((dma_direction == DMA_BIDIRECTIONAL) || - (dma_direction == DMA_TO_DEVICE) || - (dma_direction == DMA_FROM_DEVICE)); -} - #ifdef CONFIG_DMA_DECLARE_COHERENT /* * These three functions are only for dma allocator. -- cgit v1.2.3 From eba304c6861613a649ba46cfab835b1eddeacd8e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 09:33:42 +0200 Subject: dma-mapping: better document dma_addr_t and DMA_MAPPING_ERROR Move the comment documenting dma_addr_t away from the dma_map_ops definition which isn't very related to it, and toward DMA_MAPPING_ERROR, which is somewhat related. Add a little blurb about DMA_MAPPING_ERROR as well. Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 51e93d44b826..943479fb77f6 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -67,12 +67,6 @@ */ #define DMA_ATTR_PRIVILEGED (1UL << 9) -/* - * A dma_addr_t can hold any valid DMA or bus address for the platform. - * It can be given to a device to use as a DMA source or target. A CPU cannot - * reference a dma_addr_t directly because there may be translation between - * its physical address space and the bus address space. - */ struct dma_map_ops { void* (*alloc)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, @@ -131,6 +125,16 @@ struct dma_map_ops { unsigned long (*get_merge_boundary)(struct device *dev); }; +/* + * A dma_addr_t can hold any valid DMA or bus address for the platform. It can + * be given to a device to use as a DMA source or target. It is specific to a + * given device and there may be a translation between the CPU physical address + * space and the bus address space. + * + * DMA_MAPPING_ERROR is the magic error code if a mapping failed. It should not + * be used directly in drivers, but checked for using dma_mapping_error() + * instead. + */ #define DMA_MAPPING_ERROR (~(dma_addr_t)0) extern const struct dma_map_ops dma_virt_ops; -- cgit v1.2.3 From 43ee5b6daa6c45246098493dab2c229d196c9cf6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 19:17:20 +0200 Subject: mm: turn alloc_pages into an inline function To prevent a compiler error when a method call alloc_pages is added (which I plan to for the dma_map_ops). Signed-off-by: Christoph Hellwig --- include/linux/gfp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 67a0774e080b..dd2577c54071 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -550,8 +550,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) #else -#define alloc_pages(gfp_mask, order) \ - alloc_pages_node(numa_node_id(), gfp_mask, order) +static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) +{ + return alloc_pages_node(numa_node_id(), gfp_mask, order); +} #define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ alloc_pages(gfp_mask, order) #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ -- cgit v1.2.3 From 0d71675f87dc406d4c284729b8d36be050ad0d15 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 13:31:30 +0200 Subject: dma-mapping: add a new dma_alloc_noncoherent API Add a new API to allocate and free memory that is guaranteed to be addressable by a device, but which potentially is not cache coherent for DMA. To transfer ownership to and from the device, the existing streaming DMA API calls dma_sync_single_for_device and dma_sync_single_for_cpu must be used. For now the new calls are implemented on top of dma_alloc_attrs just like the old-noncoherent API, but once all drivers are switched to the new API it will be replaced with a better working implementation that is available on all architectures. Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 943479fb77f6..cb607b381adf 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -384,6 +384,18 @@ static inline unsigned long dma_get_merge_boundary(struct device *dev) } #endif /* CONFIG_HAS_DMA */ +static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + return dma_alloc_attrs(dev, size, dma_handle, gfp, + DMA_ATTR_NON_CONSISTENT); +} +static inline void dma_free_noncoherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, enum dma_data_direction dir) +{ + dma_free_attrs(dev, size, vaddr, dma_handle, DMA_ATTR_NON_CONSISTENT); +} + static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs) { -- cgit v1.2.3 From 5a84292271402cffe0717bc58e2ad9a0c7977272 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 13:28:13 +0200 Subject: dma-mapping: remove dma_cache_sync All users are gone now, remove the API. Signed-off-by: Christoph Hellwig Acked-by: Thomas Bogendoerfer (MIPS part) --- include/linux/dma-mapping.h | 8 -------- include/linux/dma-noncoherent.h | 10 ---------- 2 files changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index cb607b381adf..233bb8dcbe02 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -117,8 +117,6 @@ struct dma_map_ops { void (*sync_sg_for_device)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir); - void (*cache_sync)(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction); int (*dma_supported)(struct device *dev, u64 mask); u64 (*get_required_mask)(struct device *dev); size_t (*max_mapping_size)(struct device *dev); @@ -249,8 +247,6 @@ void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); -void dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction dir); int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); @@ -334,10 +330,6 @@ static inline void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { } -static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction dir) -{ -} static inline int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index b9bc6c557ea4..0888656369a4 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -62,16 +62,6 @@ static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, } #endif /* CONFIG_MMU */ -#ifdef CONFIG_DMA_NONCOHERENT_CACHE_SYNC -void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction); -#else -static inline void arch_dma_cache_sync(struct device *dev, void *vaddr, - size_t size, enum dma_data_direction direction) -{ -} -#endif /* CONFIG_DMA_NONCOHERENT_CACHE_SYNC */ - #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, enum dma_data_direction dir); -- cgit v1.2.3 From efa70f2fdc842e63a0a13223e0e83cedcc2117f1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 13:34:33 +0200 Subject: dma-mapping: add a new dma_alloc_pages API This API is the equivalent of alloc_pages, except that the returned memory is guaranteed to be DMA addressable by the passed in device. The implementation will also be used to provide a more sensible replacement for DMA_ATTR_NON_CONSISTENT flag. Additionally dma_alloc_noncoherent is switched over to use dma_alloc_pages as its backend. Signed-off-by: Christoph Hellwig Acked-by: Thomas Bogendoerfer (MIPS part) --- include/linux/dma-direct.h | 5 +++++ include/linux/dma-mapping.h | 34 +++++++++++++++++----------------- include/linux/dma-noncoherent.h | 3 --- 3 files changed, 22 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 83f797e0cb78..38ed3b55034d 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -115,6 +115,11 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); +struct page *dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); +void dma_direct_free_pages(struct device *dev, size_t size, + struct page *page, dma_addr_t dma_addr, + enum dma_data_direction dir); int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 233bb8dcbe02..4b9b1d64f5ec 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -27,11 +27,6 @@ * buffered to improve performance. */ #define DMA_ATTR_WRITE_COMBINE (1UL << 2) -/* - * DMA_ATTR_NON_CONSISTENT: Lets the platform to choose to return either - * consistent or non-consistent memory as it sees fit. - */ -#define DMA_ATTR_NON_CONSISTENT (1UL << 3) /* * DMA_ATTR_NO_KERNEL_MAPPING: Lets the platform to avoid creating a kernel * virtual mapping for the allocated buffer. @@ -74,6 +69,11 @@ struct dma_map_ops { void (*free)(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs); + struct page *(*alloc_pages)(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, + gfp_t gfp); + void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); int (*mmap)(struct device *, struct vm_area_struct *, void *, dma_addr_t, size_t, unsigned long attrs); @@ -376,17 +376,14 @@ static inline unsigned long dma_get_merge_boundary(struct device *dev) } #endif /* CONFIG_HAS_DMA */ -static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) -{ - return dma_alloc_attrs(dev, size, dma_handle, gfp, - DMA_ATTR_NON_CONSISTENT); -} -static inline void dma_free_noncoherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, enum dma_data_direction dir) -{ - dma_free_attrs(dev, size, vaddr, dma_handle, DMA_ATTR_NON_CONSISTENT); -} +struct page *dma_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); +void dma_free_pages(struct device *dev, size_t size, struct page *page, + dma_addr_t dma_handle, enum dma_data_direction dir); +void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); +void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -512,7 +509,10 @@ static inline void dma_sync_sgtable_for_device(struct device *dev, extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); - +struct page *dma_common_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); +void dma_common_free_pages(struct device *dev, size_t size, struct page *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); struct page **dma_common_find_pages(void *cpu_addr); void *dma_common_contiguous_remap(struct page *page, size_t size, pgprot_t prot, const void *caller); diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index 0888656369a4..e61283e06576 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -31,9 +31,6 @@ static __always_inline bool dma_alloc_need_uncached(struct device *dev, return false; if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) return false; - if (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) && - (attrs & DMA_ATTR_NON_CONSISTENT)) - return false; return true; } -- cgit v1.2.3 From de7cf917768f438aae6d2f4e9bced3739f15f5b6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 13:59:45 +0200 Subject: dma-mapping: add new {alloc,free}_noncoherent dma_map_ops methods This will allow IOMMU drivers to allocate non-contigous memory and return a vmapped virtual address. Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 4b9b1d64f5ec..7c77cd6f3604 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -74,6 +74,11 @@ struct dma_map_ops { gfp_t gfp); void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, dma_addr_t dma_handle, enum dma_data_direction dir); + void* (*alloc_noncoherent)(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, + gfp_t gfp); + void (*free_noncoherent)(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); int (*mmap)(struct device *, struct vm_area_struct *, void *, dma_addr_t, size_t, unsigned long attrs); -- cgit v1.2.3 From a8ea8bdd9df92a0e5db5b43900abb7a288b8a53e Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 21 Sep 2020 00:20:55 +0800 Subject: lib/mpi: Extend the MPI library Expand the mpi library based on libgcrypt, and the ECC algorithm of mpi based on libgcrypt requires these functions. Some other algorithms will be developed based on mpi ecc, such as SM2. Signed-off-by: Tianjia Zhang Tested-by: Xufeng Zhang Signed-off-by: Herbert Xu --- include/linux/mpi.h | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mpi.h b/include/linux/mpi.h index 5d906dfbf3ed..3c9e41603cf6 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -40,21 +40,79 @@ struct gcry_mpi { typedef struct gcry_mpi *MPI; #define mpi_get_nlimbs(a) ((a)->nlimbs) +#define mpi_has_sign(a) ((a)->sign) /*-- mpiutil.c --*/ MPI mpi_alloc(unsigned nlimbs); +void mpi_clear(MPI a); void mpi_free(MPI a); int mpi_resize(MPI a, unsigned nlimbs); +static inline MPI mpi_new(unsigned int nbits) +{ + return mpi_alloc((nbits + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB); +} + +MPI mpi_copy(MPI a); +MPI mpi_alloc_like(MPI a); +void mpi_snatch(MPI w, MPI u); +MPI mpi_set(MPI w, MPI u); +MPI mpi_set_ui(MPI w, unsigned long u); +MPI mpi_alloc_set_ui(unsigned long u); +void mpi_swap_cond(MPI a, MPI b, unsigned long swap); + +/* Constants used to return constant MPIs. See mpi_init if you + * want to add more constants. + */ +#define MPI_NUMBER_OF_CONSTANTS 6 +enum gcry_mpi_constants { + MPI_C_ZERO, + MPI_C_ONE, + MPI_C_TWO, + MPI_C_THREE, + MPI_C_FOUR, + MPI_C_EIGHT +}; + +MPI mpi_const(enum gcry_mpi_constants no); + /*-- mpicoder.c --*/ + +/* Different formats of external big integer representation. */ +enum gcry_mpi_format { + GCRYMPI_FMT_NONE = 0, + GCRYMPI_FMT_STD = 1, /* Twos complement stored without length. */ + GCRYMPI_FMT_PGP = 2, /* As used by OpenPGP (unsigned only). */ + GCRYMPI_FMT_SSH = 3, /* As used by SSH (like STD but with length). */ + GCRYMPI_FMT_HEX = 4, /* Hex format. */ + GCRYMPI_FMT_USG = 5, /* Like STD but unsigned. */ + GCRYMPI_FMT_OPAQUE = 8 /* Opaque format (some functions only). */ +}; + MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes); MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread); +int mpi_fromstr(MPI val, const char *str); +MPI mpi_scanval(const char *string); MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len); void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign); int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, int *sign); int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned nbytes, int *sign); +int mpi_print(enum gcry_mpi_format format, unsigned char *buffer, + size_t buflen, size_t *nwritten, MPI a); + +/*-- mpi-mod.c --*/ +void mpi_mod(MPI rem, MPI dividend, MPI divisor); + +/* Context used with Barrett reduction. */ +struct barrett_ctx_s; +typedef struct barrett_ctx_s *mpi_barrett_t; + +mpi_barrett_t mpi_barrett_init(MPI m, int copy); +void mpi_barrett_free(mpi_barrett_t ctx); +void mpi_mod_barrett(MPI r, MPI x, mpi_barrett_t ctx); +void mpi_mul_barrett(MPI w, MPI u, MPI v, mpi_barrett_t ctx); /*-- mpi-pow.c --*/ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod); @@ -62,6 +120,7 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod); /*-- mpi-cmp.c --*/ int mpi_cmp_ui(MPI u, ulong v); int mpi_cmp(MPI u, MPI v); +int mpi_cmpabs(MPI u, MPI v); /*-- mpi-sub-ui.c --*/ int mpi_sub_ui(MPI w, MPI u, unsigned long vval); @@ -69,6 +128,34 @@ int mpi_sub_ui(MPI w, MPI u, unsigned long vval); /*-- mpi-bit.c --*/ void mpi_normalize(MPI a); unsigned mpi_get_nbits(MPI a); +int mpi_test_bit(MPI a, unsigned int n); +void mpi_set_bit(MPI a, unsigned int n); +void mpi_set_highbit(MPI a, unsigned int n); +void mpi_clear_highbit(MPI a, unsigned int n); +void mpi_clear_bit(MPI a, unsigned int n); +void mpi_rshift_limbs(MPI a, unsigned int count); +void mpi_rshift(MPI x, MPI a, unsigned int n); +void mpi_lshift_limbs(MPI a, unsigned int count); +void mpi_lshift(MPI x, MPI a, unsigned int n); + +/*-- mpi-add.c --*/ +void mpi_add_ui(MPI w, MPI u, unsigned long v); +void mpi_add(MPI w, MPI u, MPI v); +void mpi_sub(MPI w, MPI u, MPI v); +void mpi_addm(MPI w, MPI u, MPI v, MPI m); +void mpi_subm(MPI w, MPI u, MPI v, MPI m); + +/*-- mpi-mul.c --*/ +void mpi_mul(MPI w, MPI u, MPI v); +void mpi_mulm(MPI w, MPI u, MPI v, MPI m); + +/*-- mpi-div.c --*/ +void mpi_tdiv_r(MPI rem, MPI num, MPI den); +void mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor); +void mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor); + +/*-- mpi-inv.c --*/ +int mpi_invm(MPI x, MPI a, MPI n); /* inline functions */ -- cgit v1.2.3 From d58bb7e55a8a65894cc02f27c3e2bf9403e7c40f Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 21 Sep 2020 00:20:56 +0800 Subject: lib/mpi: Introduce ec implementation to MPI library The implementation of EC is introduced from libgcrypt as the basic algorithm of elliptic curve, which can be more perfectly integrated with MPI implementation. Some other algorithms will be developed based on mpi ecc, such as SM2. Signed-off-by: Tianjia Zhang Tested-by: Xufeng Zhang Signed-off-by: Herbert Xu --- include/linux/mpi.h | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mpi.h b/include/linux/mpi.h index 3c9e41603cf6..3e5358f4de2f 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -157,6 +157,111 @@ void mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor); /*-- mpi-inv.c --*/ int mpi_invm(MPI x, MPI a, MPI n); +/*-- ec.c --*/ + +/* Object to represent a point in projective coordinates */ +struct gcry_mpi_point { + MPI x; + MPI y; + MPI z; +}; + +typedef struct gcry_mpi_point *MPI_POINT; + +/* Models describing an elliptic curve */ +enum gcry_mpi_ec_models { + /* The Short Weierstrass equation is + * y^2 = x^3 + ax + b + */ + MPI_EC_WEIERSTRASS = 0, + /* The Montgomery equation is + * by^2 = x^3 + ax^2 + x + */ + MPI_EC_MONTGOMERY, + /* The Twisted Edwards equation is + * ax^2 + y^2 = 1 + bx^2y^2 + * Note that we use 'b' instead of the commonly used 'd'. + */ + MPI_EC_EDWARDS +}; + +/* Dialects used with elliptic curves */ +enum ecc_dialects { + ECC_DIALECT_STANDARD = 0, + ECC_DIALECT_ED25519, + ECC_DIALECT_SAFECURVE +}; + +/* This context is used with all our EC functions. */ +struct mpi_ec_ctx { + enum gcry_mpi_ec_models model; /* The model describing this curve. */ + enum ecc_dialects dialect; /* The ECC dialect used with the curve. */ + int flags; /* Public key flags (not always used). */ + unsigned int nbits; /* Number of bits. */ + + /* Domain parameters. Note that they may not all be set and if set + * the MPIs may be flaged as constant. + */ + MPI p; /* Prime specifying the field GF(p). */ + MPI a; /* First coefficient of the Weierstrass equation. */ + MPI b; /* Second coefficient of the Weierstrass equation. */ + MPI_POINT G; /* Base point (generator). */ + MPI n; /* Order of G. */ + unsigned int h; /* Cofactor. */ + + /* The actual key. May not be set. */ + MPI_POINT Q; /* Public key. */ + MPI d; /* Private key. */ + + const char *name; /* Name of the curve. */ + + /* This structure is private to mpi/ec.c! */ + struct { + struct { + unsigned int a_is_pminus3:1; + unsigned int two_inv_p:1; + } valid; /* Flags to help setting the helper vars below. */ + + int a_is_pminus3; /* True if A = P - 3. */ + + MPI two_inv_p; + + mpi_barrett_t p_barrett; + + /* Scratch variables. */ + MPI scratch[11]; + + /* Helper for fast reduction. */ + /* int nist_nbits; /\* If this is a NIST curve, the # of bits. *\/ */ + /* MPI s[10]; */ + /* MPI c; */ + } t; + + /* Curve specific computation routines for the field. */ + void (*addm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx); + void (*subm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ec); + void (*mulm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx); + void (*pow2)(MPI w, const MPI b, struct mpi_ec_ctx *ctx); + void (*mul2)(MPI w, MPI u, struct mpi_ec_ctx *ctx); +}; + +void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model, + enum ecc_dialects dialect, + int flags, MPI p, MPI a, MPI b); +void mpi_ec_deinit(struct mpi_ec_ctx *ctx); +MPI_POINT mpi_point_new(unsigned int nbits); +void mpi_point_release(MPI_POINT p); +void mpi_point_init(MPI_POINT p); +void mpi_point_free_parts(MPI_POINT p); +int mpi_ec_get_affine(MPI x, MPI y, MPI_POINT point, struct mpi_ec_ctx *ctx); +void mpi_ec_add_points(MPI_POINT result, + MPI_POINT p1, MPI_POINT p2, + struct mpi_ec_ctx *ctx); +void mpi_ec_mul_point(MPI_POINT result, + MPI scalar, MPI_POINT point, + struct mpi_ec_ctx *ctx); +int mpi_ec_curve_point(MPI_POINT point, struct mpi_ec_ctx *ctx); + /* inline functions */ /** -- cgit v1.2.3 From 254f84f559039b6d6f6e1035fd7645b42671ab48 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 21 Sep 2020 00:21:01 +0800 Subject: X.509: support OSCCA certificate parse The digital certificate format based on SM2 crypto algorithm as specified in GM/T 0015-2012. It was published by State Encryption Management Bureau, China. This patch adds the OID object identifier defined by OSCCA. The x509 certificate supports SM2-with-SM3 type certificate parsing. It uses the standard elliptic curve public key, and the sm2 algorithm signs the hash generated by sm3. Signed-off-by: Tianjia Zhang Tested-by: Xufeng Zhang Reviewed-by: Vitaly Chikunov Signed-off-by: Herbert Xu --- include/linux/oid_registry.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 657d6bf2c064..4462ed2c18cd 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -107,6 +107,12 @@ enum OID { OID_gostTC26Sign512B, /* 1.2.643.7.1.2.1.2.2 */ OID_gostTC26Sign512C, /* 1.2.643.7.1.2.1.2.3 */ + /* OSCCA */ + OID_sm2, /* 1.2.156.10197.1.301 */ + OID_sm3, /* 1.2.156.10197.1.401 */ + OID_SM2_with_SM3, /* 1.2.156.10197.1.501 */ + OID_sm3WithRSAEncryption, /* 1.2.156.10197.1.504 */ + OID__NR }; -- cgit v1.2.3 From 749626fa099085ca77f3ff05a8efcb3b87721724 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 1 Sep 2020 14:47:16 +0800 Subject: ACPI: Remove three unused inline functions There is no callers in tree. Signed-off-by: YueHaibing [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 1e4cdc6c7ae2..7ce2235f99f9 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -977,8 +977,6 @@ int acpi_subsys_runtime_suspend(struct device *dev); int acpi_subsys_runtime_resume(struct device *dev); int acpi_dev_pm_attach(struct device *dev, bool power_on); #else -static inline int acpi_dev_runtime_suspend(struct device *dev) { return 0; } -static inline int acpi_dev_runtime_resume(struct device *dev) { return 0; } static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; } static inline int acpi_dev_pm_attach(struct device *dev, bool power_on) @@ -1216,13 +1214,6 @@ static inline int acpi_node_prop_get(const struct fwnode_handle *fwnode, return -ENXIO; } -static inline int acpi_dev_prop_get(const struct acpi_device *adev, - const char *propname, - void **valptr) -{ - return -ENXIO; -} - static inline int acpi_dev_prop_read_single(const struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, -- cgit v1.2.3 From 2a36ab717e8fe678d98f81c14a0b124712719840 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 23 Sep 2020 16:36:16 -0700 Subject: rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ This patchset is based on Google-internal RSEQ work done by Paul Turner and Andrew Hunter. When working with per-CPU RSEQ-based memory allocations, it is sometimes important to make sure that a global memory location is no longer accessed from RSEQ critical sections. For example, there can be two per-CPU lists, one is "active" and accessed per-CPU, while another one is inactive and worked on asynchronously "off CPU" (e.g. garbage collection is performed). Then at some point the two lists are swapped, and a fast RCU-like mechanism is required to make sure that the previously active list is no longer accessed. This patch introduces such a mechanism: in short, membarrier() syscall issues an IPI to a CPU, restarting a potentially active RSEQ critical section on the CPU. Signed-off-by: Peter Oskolkov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mathieu Desnoyers Link: https://lkml.kernel.org/r/20200923233618.2572849-1-posk@google.com --- include/linux/sched/mm.h | 3 +++ include/linux/syscalls.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index f889e332912f..15bfb06f2884 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -348,10 +348,13 @@ enum { MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3), MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY = (1U << 4), MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE = (1U << 5), + MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY = (1U << 6), + MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ = (1U << 7), }; enum { MEMBARRIER_FLAG_SYNC_CORE = (1U << 0), + MEMBARRIER_FLAG_RSEQ = (1U << 1), }; #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 75ac7f8ae93c..06db09875aa4 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -974,7 +974,7 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp, int flags); asmlinkage long sys_userfaultfd(int flags); -asmlinkage long sys_membarrier(int cmd, int flags); +asmlinkage long sys_membarrier(int cmd, unsigned int flags, int cpu_id); asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags); asmlinkage long sys_copy_file_range(int fd_in, loff_t __user *off_in, int fd_out, loff_t __user *off_out, -- cgit v1.2.3 From fa01b1e9733fd59ecb8b5b6d85dfb481d2025fbf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 07:40:57 +0200 Subject: block: add a bdev_is_partition helper Add a littler helper to make the somewhat arcane bd_contains checks a little more obvious. Signed-off-by: Christoph Hellwig Acked-by: Ulf Hansson Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8e77f12de522..33f283885ba5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1353,6 +1353,11 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, extern int blk_verify_command(unsigned char *cmd, fmode_t mode); +static inline bool bdev_is_partition(struct block_device *bdev) +{ + return bdev->bd_partno; +} + enum blk_default_limits { BLK_MAX_SEGMENTS = 128, BLK_SAFE_MAX_SECTORS = 255, @@ -1469,7 +1474,7 @@ static inline int bdev_alignment_offset(struct block_device *bdev) if (q->limits.misaligned) return -1; - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) return queue_limit_alignment_offset(&q->limits, bdev->bd_part->start_sect); return q->limits.alignment_offset; @@ -1510,7 +1515,7 @@ static inline int bdev_discard_alignment(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) return queue_limit_discard_alignment(&q->limits, bdev->bd_part->start_sect); return q->limits.discard_alignment; -- cgit v1.2.3 From 021a24460dc28e7412aecfae89f60e1847e685c0 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 23 Sep 2020 16:06:51 -0400 Subject: block: add QUEUE_FLAG_NOWAIT Add QUEUE_FLAG_NOWAIT to allow a block device to advertise support for REQ_NOWAIT. Bio-based devices may set QUEUE_FLAG_NOWAIT where applicable. Update QUEUE_FLAG_MQ_DEFAULT to include QUEUE_FLAG_NOWAIT. Also update submit_bio_checks() to verify it is set for REQ_NOWAIT bios. Reported-by: Konstantin Khlebnikov Suggested-by: Christoph Hellwig Signed-off-by: Mike Snitzer Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 33f283885ba5..d5a3e1a4c2f7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -619,10 +619,12 @@ struct request_queue { #define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ #define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ -#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ +#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ +#define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ - (1 << QUEUE_FLAG_SAME_COMP)) + (1 << QUEUE_FLAG_SAME_COMP) | \ + (1 << QUEUE_FLAG_NOWAIT)) void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); @@ -664,6 +666,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_pm_only(q) atomic_read(&(q)->pm_only) #define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags) #define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags) +#define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags) extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); -- cgit v1.2.3 From 6abc49468eeaad2dd481779906aaa29605139087 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 23 Sep 2020 16:06:52 -0400 Subject: dm: add support for REQ_NOWAIT and enable it for linear target Add DM target feature flag DM_TARGET_NOWAIT which advertises that target works with REQ_NOWAIT bios. Add dm_table_supports_nowait() and update dm_table_set_restrictions() to set/clear QUEUE_FLAG_NOWAIT accordingly. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/device-mapper.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 93096e524e43..d6f8d4ba8d48 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -252,6 +252,12 @@ struct target_type { #define DM_TARGET_ZONED_HM 0x00000040 #define dm_target_supports_zoned_hm(type) ((type)->features & DM_TARGET_ZONED_HM) +/* + * A target handles REQ_NOWAIT + */ +#define DM_TARGET_NOWAIT 0x00000080 +#define dm_target_supports_nowait(type) ((type)->features & DM_TARGET_NOWAIT) + struct dm_target { struct dm_table *table; struct target_type *type; -- cgit v1.2.3 From 34d8f7a4627ca59ba915c7ea44fe9d9123875712 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Mon, 21 Sep 2020 14:13:35 +0800 Subject: iopoll: update kerneldoc of read_poll_timeout_atomic() Arguments description of read_poll_timeout_atomic() is out of date, update it. Cc: Alan Stern Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/1600668815-12135-11-git-send-email-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- include/linux/iopoll.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index bc89ac625f26..2c8860e406bd 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -60,8 +60,7 @@ /** * read_poll_timeout_atomic - Periodically poll an address until a condition is * met or a timeout occurs - * @op: accessor function (takes @addr as its only argument) - * @addr: Address to poll + * @op: accessor function (takes @args as its arguments) * @val: Variable to read the value into * @cond: Break condition (usually involving @val) * @delay_us: Time to udelay between reads in us (0 tight-loops). Should @@ -69,6 +68,7 @@ * Documentation/timers/timers-howto.rst). * @timeout_us: Timeout in us, 0 means never timeout * @delay_before_read: if it is true, delay @delay_us before read. + * @args: arguments for @op poll * * Returns 0 on success and -ETIMEDOUT upon a timeout. In either * case, the last read value at @args is stored in @val. -- cgit v1.2.3 From ddd1198e3e0935066d6e309180d49f64ef4fa702 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 23 Sep 2020 15:43:42 +0200 Subject: USB: correct API of usb_control_msg_send/recv They need to specify how memory is to be allocated, as control messages need to work in contexts that require GFP_NOIO. Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20200923134348.23862-9-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index a5460f08126e..7d72c4e0713c 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1804,10 +1804,12 @@ extern int usb_bulk_msg(struct usb_device *usb_dev, unsigned int pipe, /* wrappers around usb_control_msg() for the most common standard requests */ int usb_control_msg_send(struct usb_device *dev, __u8 endpoint, __u8 request, __u8 requesttype, __u16 value, __u16 index, - const void *data, __u16 size, int timeout); + const void *data, __u16 size, int timeout, + gfp_t memflags); int usb_control_msg_recv(struct usb_device *dev, __u8 endpoint, __u8 request, __u8 requesttype, __u16 value, __u16 index, - void *data, __u16 size, int timeout); + void *data, __u16 size, int timeout, + gfp_t memflags); extern int usb_get_descriptor(struct usb_device *dev, unsigned char desctype, unsigned char descindex, void *buf, int size); extern int usb_get_status(struct usb_device *dev, -- cgit v1.2.3 From 5f6e560c2dd5be3ae446f20ae97263cbfa309630 Mon Sep 17 00:00:00 2001 From: Dennis YC Hsieh Date: Tue, 7 Jul 2020 23:45:07 +0800 Subject: soc: mediatek: cmdq: add write_s function add write_s function in cmdq helper functions which writes value contains in internal register to address with large dma access support. Signed-off-by: Dennis YC Hsieh Link: https://lore.kernel.org/r/1594136714-11650-3-git-send-email-dennis-yc.hsieh@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/mailbox/mtk-cmdq-mailbox.h | 1 + include/linux/soc/mediatek/mtk-cmdq.h | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h index 05eea1aef5aa..1f76cfedb16d 100644 --- a/include/linux/mailbox/mtk-cmdq-mailbox.h +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h @@ -60,6 +60,7 @@ enum cmdq_code { CMDQ_CODE_JUMP = 0x10, CMDQ_CODE_WFE = 0x20, CMDQ_CODE_EOC = 0x40, + CMDQ_CODE_WRITE_S = 0x90, CMDQ_CODE_LOGIC = 0xa0, }; diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index 2249ecaf77e4..9b0c57a0063d 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -12,6 +12,8 @@ #include #define CMDQ_NO_TIMEOUT 0xffffffffu +#define CMDQ_ADDR_HIGH(addr) ((u32)(((addr) >> 16) & GENMASK(31, 0))) +#define CMDQ_ADDR_LOW(addr) ((u16)(addr) | BIT(1)) struct cmdq_pkt; @@ -102,6 +104,23 @@ int cmdq_pkt_write(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u32 value); int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u32 value, u32 mask); +/** + * cmdq_pkt_write_s() - append write_s command to the CMDQ packet + * @pkt: the CMDQ packet + * @high_addr_reg_idx: internal register ID which contains high address of pa + * @addr_low: low address of pa + * @src_reg_idx: the CMDQ internal register ID which cache source value + * + * Return: 0 for success; else the error code is returned + * + * Support write value to physical address without subsys. Use CMDQ_ADDR_HIGH() + * to get high address and call cmdq_pkt_assign() to assign value into internal + * reg. Also use CMDQ_ADDR_LOW() to get low address for addr_low parameter when + * call to this function. + */ +int cmdq_pkt_write_s(struct cmdq_pkt *pkt, u16 high_addr_reg_idx, + u16 addr_low, u16 src_reg_idx); + /** * cmdq_pkt_wfe() - append wait for event command to the CMDQ packet * @pkt: the CMDQ packet -- cgit v1.2.3 From 11c7842d41c82eb3551a0606ccba89ac33318b62 Mon Sep 17 00:00:00 2001 From: Dennis YC Hsieh Date: Tue, 7 Jul 2020 23:45:08 +0800 Subject: soc: mediatek: cmdq: add write_s_mask function add write_s_mask function in cmdq helper functions which writes value contains in internal register to address with mask and large dma access support. Signed-off-by: Dennis YC Hsieh Link: https://lore.kernel.org/r/1594136714-11650-4-git-send-email-dennis-yc.hsieh@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/mailbox/mtk-cmdq-mailbox.h | 1 + include/linux/soc/mediatek/mtk-cmdq.h | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h index 1f76cfedb16d..90d1d8e64412 100644 --- a/include/linux/mailbox/mtk-cmdq-mailbox.h +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h @@ -61,6 +61,7 @@ enum cmdq_code { CMDQ_CODE_WFE = 0x20, CMDQ_CODE_EOC = 0x40, CMDQ_CODE_WRITE_S = 0x90, + CMDQ_CODE_WRITE_S_MASK = 0x91, CMDQ_CODE_LOGIC = 0xa0, }; diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index 9b0c57a0063d..53230341bf94 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -121,6 +121,24 @@ int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u8 subsys, int cmdq_pkt_write_s(struct cmdq_pkt *pkt, u16 high_addr_reg_idx, u16 addr_low, u16 src_reg_idx); +/** + * cmdq_pkt_write_s_mask() - append write_s with mask command to the CMDQ packet + * @pkt: the CMDQ packet + * @high_addr_reg_idx: internal register ID which contains high address of pa + * @addr_low: low address of pa + * @src_reg_idx: the CMDQ internal register ID which cache source value + * @mask: the specified target address mask, use U32_MAX if no need + * + * Return: 0 for success; else the error code is returned + * + * Support write value to physical address without subsys. Use CMDQ_ADDR_HIGH() + * to get high address and call cmdq_pkt_assign() to assign value into internal + * reg. Also use CMDQ_ADDR_LOW() to get low address for addr_low parameter when + * call to this function. + */ +int cmdq_pkt_write_s_mask(struct cmdq_pkt *pkt, u16 high_addr_reg_idx, + u16 addr_low, u16 src_reg_idx, u32 mask); + /** * cmdq_pkt_wfe() - append wait for event command to the CMDQ packet * @pkt: the CMDQ packet -- cgit v1.2.3 From d3b04aab06fbc33ddea15725f3ff1667c9717929 Mon Sep 17 00:00:00 2001 From: Dennis YC Hsieh Date: Tue, 7 Jul 2020 23:45:09 +0800 Subject: soc: mediatek: cmdq: add read_s function Add read_s function in cmdq helper functions which support read value from register or dma physical address into gce internal register. Signed-off-by: Dennis YC Hsieh Link: https://lore.kernel.org/r/1594136714-11650-5-git-send-email-dennis-yc.hsieh@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/mailbox/mtk-cmdq-mailbox.h | 1 + include/linux/soc/mediatek/mtk-cmdq.h | 12 ++++++++++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h index 90d1d8e64412..efbd8a9eb2d1 100644 --- a/include/linux/mailbox/mtk-cmdq-mailbox.h +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h @@ -60,6 +60,7 @@ enum cmdq_code { CMDQ_CODE_JUMP = 0x10, CMDQ_CODE_WFE = 0x20, CMDQ_CODE_EOC = 0x40, + CMDQ_CODE_READ_S = 0x80, CMDQ_CODE_WRITE_S = 0x90, CMDQ_CODE_WRITE_S_MASK = 0x91, CMDQ_CODE_LOGIC = 0xa0, diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index 53230341bf94..cd7ec714344e 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -104,6 +104,18 @@ int cmdq_pkt_write(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u32 value); int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u32 value, u32 mask); +/* + * cmdq_pkt_read_s() - append read_s command to the CMDQ packet + * @pkt: the CMDQ packet + * @high_addr_reg_idx: internal register ID which contains high address of pa + * @addr_low: low address of pa + * @reg_idx: the CMDQ internal register ID to cache read data + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_read_s(struct cmdq_pkt *pkt, u16 high_addr_reg_idx, u16 addr_low, + u16 reg_idx); + /** * cmdq_pkt_write_s() - append write_s command to the CMDQ packet * @pkt: the CMDQ packet -- cgit v1.2.3 From 1af43fce813ebd74c76d080beb261603bd0853e1 Mon Sep 17 00:00:00 2001 From: Dennis YC Hsieh Date: Tue, 7 Jul 2020 23:45:10 +0800 Subject: soc: mediatek: cmdq: add write_s value function add write_s function in cmdq helper functions which writes a constant value to address with large dma access support. Signed-off-by: Dennis YC Hsieh Link: https://lore.kernel.org/r/1594136714-11650-6-git-send-email-dennis-yc.hsieh@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/mtk-cmdq.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index cd7ec714344e..ae73e10da274 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -151,6 +151,19 @@ int cmdq_pkt_write_s(struct cmdq_pkt *pkt, u16 high_addr_reg_idx, int cmdq_pkt_write_s_mask(struct cmdq_pkt *pkt, u16 high_addr_reg_idx, u16 addr_low, u16 src_reg_idx, u32 mask); +/** + * cmdq_pkt_write_s_value() - append write_s command to the CMDQ packet which + * write value to a physical address + * @pkt: the CMDQ packet + * @high_addr_reg_idx: internal register ID which contains high address of pa + * @addr_low: low address of pa + * @value: the specified target value + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_write_s_value(struct cmdq_pkt *pkt, u8 high_addr_reg_idx, + u16 addr_low, u32 value); + /** * cmdq_pkt_wfe() - append wait for event command to the CMDQ packet * @pkt: the CMDQ packet -- cgit v1.2.3 From 88a2ffc48d5bc85119ef7961df12369dcd53b4d2 Mon Sep 17 00:00:00 2001 From: Dennis YC Hsieh Date: Tue, 7 Jul 2020 23:45:11 +0800 Subject: soc: mediatek: cmdq: add write_s_mask value function add write_s_mask_value function in cmdq helper functions which writes a constant value to address with mask and large dma access support. Signed-off-by: Dennis YC Hsieh Link: https://lore.kernel.org/r/1594136714-11650-7-git-send-email-dennis-yc.hsieh@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/mtk-cmdq.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index ae73e10da274..d9390d76ee14 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -164,6 +164,21 @@ int cmdq_pkt_write_s_mask(struct cmdq_pkt *pkt, u16 high_addr_reg_idx, int cmdq_pkt_write_s_value(struct cmdq_pkt *pkt, u8 high_addr_reg_idx, u16 addr_low, u32 value); +/** + * cmdq_pkt_write_s_mask_value() - append write_s command with mask to the CMDQ + * packet which write value to a physical + * address + * @pkt: the CMDQ packet + * @high_addr_reg_idx: internal register ID which contains high address of pa + * @addr_low: low address of pa + * @value: the specified target value + * @mask: the specified target mask + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_write_s_mask_value(struct cmdq_pkt *pkt, u8 high_addr_reg_idx, + u16 addr_low, u32 value, u32 mask); + /** * cmdq_pkt_wfe() - append wait for event command to the CMDQ packet * @pkt: the CMDQ packet -- cgit v1.2.3 From 946f1792d3d7942acfbc6afa9a733f608f4622d6 Mon Sep 17 00:00:00 2001 From: Dennis YC Hsieh Date: Tue, 7 Jul 2020 23:45:12 +0800 Subject: soc: mediatek: cmdq: add jump function Add jump function so that client can jump to any address which contains instruction. Signed-off-by: Dennis YC Hsieh Link: https://lore.kernel.org/r/1594136714-11650-8-git-send-email-dennis-yc.hsieh@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/mtk-cmdq.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index d9390d76ee14..34354e952f60 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -252,6 +252,17 @@ int cmdq_pkt_poll_mask(struct cmdq_pkt *pkt, u8 subsys, */ int cmdq_pkt_assign(struct cmdq_pkt *pkt, u16 reg_idx, u32 value); +/** + * cmdq_pkt_jump() - Append jump command to the CMDQ packet, ask GCE + * to execute an instruction that change current thread PC to + * a physical address which should contains more instruction. + * @pkt: the CMDQ packet + * @addr: physical address of target instruction buffer + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_jump(struct cmdq_pkt *pkt, dma_addr_t addr); + /** * cmdq_pkt_finalize() - Append EOC and jump command to pkt. * @pkt: the CMDQ packet -- cgit v1.2.3 From 23c22299cd290409c6b78f57c42b64f8dfb6dd92 Mon Sep 17 00:00:00 2001 From: Dennis YC Hsieh Date: Tue, 7 Jul 2020 23:45:13 +0800 Subject: soc: mediatek: cmdq: add clear option in cmdq_pkt_wfe api Add clear parameter to let client decide if event should be clear to 0 after GCE receive it. Signed-off-by: Dennis YC Hsieh Acked-by: Chun-Kuang Hu Link: https://lore.kernel.org/r/1594136714-11650-9-git-send-email-dennis-yc.hsieh@mediatek.com [mb: fix commit message] Signed-off-by: Matthias Brugger --- include/linux/mailbox/mtk-cmdq-mailbox.h | 3 +-- include/linux/soc/mediatek/mtk-cmdq.h | 5 +++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h index efbd8a9eb2d1..d5a983d65f05 100644 --- a/include/linux/mailbox/mtk-cmdq-mailbox.h +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h @@ -28,8 +28,7 @@ * bit 16-27: update value * bit 31: 1 - update, 0 - no update */ -#define CMDQ_WFE_OPTION (CMDQ_WFE_UPDATE | CMDQ_WFE_WAIT | \ - CMDQ_WFE_WAIT_VALUE) +#define CMDQ_WFE_OPTION (CMDQ_WFE_WAIT | CMDQ_WFE_WAIT_VALUE) /** cmdq event maximum */ #define CMDQ_MAX_EVENT 0x3ff diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index 34354e952f60..960704d75994 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -182,11 +182,12 @@ int cmdq_pkt_write_s_mask_value(struct cmdq_pkt *pkt, u8 high_addr_reg_idx, /** * cmdq_pkt_wfe() - append wait for event command to the CMDQ packet * @pkt: the CMDQ packet - * @event: the desired event type to "wait and CLEAR" + * @event: the desired event type to wait + * @clear: clear event or not after event arrive * * Return: 0 for success; else the error code is returned */ -int cmdq_pkt_wfe(struct cmdq_pkt *pkt, u16 event); +int cmdq_pkt_wfe(struct cmdq_pkt *pkt, u16 event, bool clear); /** * cmdq_pkt_clear_event() - append clear event command to the CMDQ packet -- cgit v1.2.3 From ea0c80d1764449acf2f70fdb25aec33800cd0348 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 24 Sep 2020 15:17:49 +0800 Subject: genirq: Add stub for set_handle_irq() when !GENERIC_IRQ_MULTI_HANDLER In order to avoid compilation errors when a driver references set_handle_irq(), but that the architecture doesn't select GENERIC_IRQ_MULTI_HANDLER, add a stub function that will just WARN_ON_ONCE() if ever used. Signed-off-by: Zhen Lei [maz: commit message] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200924071754.4509-2-thunder.leizhen@huawei.com --- include/linux/irq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 1b7f4dfee35b..b167baef88c0 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1252,6 +1252,12 @@ int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)); * top-level IRQ handler. */ extern void (*handle_arch_irq)(struct pt_regs *) __ro_after_init; +#else +#define set_handle_irq(handle_irq) \ + do { \ + (void)handle_irq; \ + WARN_ON(1); \ + } while (0) #endif #endif /* _LINUX_IRQ_H */ -- cgit v1.2.3 From 576d0703a81217f04e3a29ce3403231062ef2877 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 06:51:38 +0200 Subject: compat.h: fix a spelling error in There is no compat_sys_readv64v2 syscall, only a compat_sys_preadv64v2 one. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/compat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index d38c4d7e83bd..790be5ffc12c 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -812,7 +812,7 @@ asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd, const struct compat_iovec __user *vec, compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 -asmlinkage long compat_sys_readv64v2(unsigned long fd, +asmlinkage long compat_sys_preadv64v2(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen, loff_t pos, rwf_t flags); #endif -- cgit v1.2.3 From 1df8f55a37bd286a3d40192980050bc3d7d78887 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:03:50 -0700 Subject: bpf: Enable bpf_skc_to_* sock casting helper to networking prog type There is a constant need to add more fields into the bpf_tcp_sock for the bpf programs running at tc, sock_ops...etc. A current workaround could be to use bpf_probe_read_kernel(). However, other than making another helper call for reading each field and missing CO-RE, it is also not as intuitive to use as directly reading "tp->lsndtime" for example. While already having perfmon cap to do bpf_probe_read_kernel(), it will be much easier if the bpf prog can directly read from the tcp_sock. This patch tries to do that by using the existing casting-helpers bpf_skc_to_*() whose func_proto returns a btf_id. For example, the func_proto of bpf_skc_to_tcp_sock returns the btf_id of the kernel "struct tcp_sock". These helpers are also added to is_ptr_cast_function(). It ensures the returning reg (BPF_REF_0) will also carries the ref_obj_id. That will keep the ref-tracking works properly. The bpf_skc_to_* helpers are made available to most of the bpf prog types in filter.c. The bpf_skc_to_* helpers will be limited by perfmon cap. This patch adds a ARG_PTR_TO_BTF_ID_SOCK_COMMON. The helper accepting this arg can accept a btf-id-ptr (PTR_TO_BTF_ID + &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON]) or a legacy-ctx-convert-skc-ptr (PTR_TO_SOCK_COMMON). The bpf_skc_to_*() helpers are changed to take ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will accept pointer obtained from skb->sk. Instead of specifying both arg_type and arg_btf_id in the same func_proto which is how the current ARG_PTR_TO_BTF_ID does, the arg_btf_id of the new ARG_PTR_TO_BTF_ID_SOCK_COMMON is specified in the compatible_reg_types[] in verifier.c. The reason is the arg_btf_id is always the same. Discussion in this thread: https://lore.kernel.org/bpf/20200922070422.1917351-1-kafai@fb.com/ The ARG_PTR_TO_BTF_ID_ part gives a clear expectation that the helper is expecting a PTR_TO_BTF_ID which could be NULL. This is the same behavior as the existing helper taking ARG_PTR_TO_BTF_ID. The _SOCK_COMMON part means the helper is also expecting the legacy SOCK_COMMON pointer. By excluding the _OR_NULL part, the bpf prog cannot call helper with a literal NULL which doesn't make sense in most cases. e.g. bpf_skc_to_tcp_sock(NULL) will be rejected. All PTR_TO_*_OR_NULL reg has to do a NULL check first before passing into the helper or else the bpf prog will be rejected. This behavior is nothing new and consistent with the current expectation during bpf-prog-load. [ ARG_PTR_TO_BTF_ID_SOCK_COMMON will be used to replace ARG_PTR_TO_SOCK* of other existing helpers later such that those existing helpers can take the PTR_TO_BTF_ID returned by the bpf_skc_to_*() helpers. The only special case is bpf_sk_lookup_assign() which can accept a literal NULL ptr. It has to be handled specially in another follow up patch if there is a need (e.g. by renaming ARG_PTR_TO_SOCKET_OR_NULL to ARG_PTR_TO_BTF_ID_SOCK_COMMON_OR_NULL). ] [ When converting the older helpers that take ARG_PTR_TO_SOCK* in the later patch, if the kernel does not support BTF, ARG_PTR_TO_BTF_ID_SOCK_COMMON will behave like ARG_PTR_TO_SOCK_COMMON because no reg->type could have PTR_TO_BTF_ID in this case. It is not a concern for the newer-btf-only helper like the bpf_skc_to_*() here though because these helpers must require BTF vmlinux to begin with. ] Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200925000350.3855720-1-kafai@fb.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fc5c901c7542..d0937f1d2980 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -292,6 +292,7 @@ enum bpf_arg_type { ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ + ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ __BPF_ARG_TYPE_MAX, }; -- cgit v1.2.3 From 6277e374b0b07c1a93c829f0a27e38739b3b7a1b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 24 Sep 2020 13:52:24 +0200 Subject: efi: Add definition of EFI_MEMORY_CPU_CRYPTO and ability to report it Incorporate the definition of EFI_MEMORY_CPU_CRYPTO from the UEFI specification v2.8, and wire it into our memory map dumping routine as well. To make a bit of space in the output buffer, which is provided by the various callers, shorten the descriptive names of the memory types. Reviewed-by: Laszlo Ersek Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 73db1ae04cef..f216c029a77b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -122,6 +122,7 @@ typedef struct { ((u64)0x0000000000010000ULL) /* higher reliability */ #define EFI_MEMORY_RO ((u64)0x0000000000020000ULL) /* read-only */ #define EFI_MEMORY_SP ((u64)0x0000000000040000ULL) /* soft reserved */ +#define EFI_MEMORY_CPU_CRYPTO ((u64)0x0000000000080000ULL) /* supports encryption */ #define EFI_MEMORY_RUNTIME ((u64)0x8000000000000000ULL) /* range requires runtime mapping */ #define EFI_MEMORY_DESCRIPTOR_VERSION 1 -- cgit v1.2.3 From 403217f30418c808600e3e8e345413ba5cc15676 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Mon, 17 Aug 2020 12:53:06 -0400 Subject: SUNRPC/NFSD: Implement xdr_reserve_space_vec() Reserving space for a large READ payload requires special handling when reserving space in the xdr buffer pages. One problem we can have is use of the scratch buffer, which is used to get a pointer to a contiguous region of data up to PAGE_SIZE. When using the scratch buffer, calls to xdr_commit_encode() shift the data to it's proper alignment in the xdr buffer. If we've reserved several pages in a vector, then this could potentially invalidate earlier pointers and result in incorrect READ data being sent to the client. I get around this by looking at the amount of space left in the current page, and never reserve more than that for each entry in the read vector. This lets us place data directly where it needs to go in the buffer pages. Signed-off-by: Anna Schumaker Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5a6a81b7cd9f..6613d96a3029 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -234,6 +234,8 @@ typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); +extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, + size_t nbytes); extern void xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); -- cgit v1.2.3 From afbe7973173a7ce0a68af8b33e44c967582297be Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 24 Sep 2020 12:30:20 -0400 Subject: tracepoints: Add helper to test if tracepoint is enabled in a header As tracepoints are discouraged from being added in a header because it can cause side effects if other tracepoints are in headers, as well as bloat the kernel as the trace_() function is not a small inline, the common workaround is to add a function call that calls a wrapper function in a C file that then calls the tracepoint. But as function calls add overhead, this function should only be called when the tracepoint in question is enabled. To get around this overhead, a static_branch can be used to only have the tracepoint wrapper get called when the tracepoint is enabled. Add a tracepoint_enabled(tp) macro that gets passed the name of the tracepoint, and this becomes a static_branch that is enabled when the tracepoint is enabled and is a nop when the tracepoint is disabled. Signed-off-by: Steven Rostedt (VMware) --- include/linux/tracepoint-defs.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index b29950a19205..60625973faaf 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -48,4 +48,38 @@ struct bpf_raw_event_map { u32 writable_size; } __aligned(32); +/* + * If a tracepoint needs to be called from a header file, it is not + * recommended to call it directly, as tracepoints in header files + * may cause side-effects and bloat the kernel. Instead, use + * tracepoint_enabled() to test if the tracepoint is enabled, then if + * it is, call a wrapper function defined in a C file that will then + * call the tracepoint. + * + * For "trace_foo_bar()", you would need to create a wrapper function + * in a C file to call trace_foo_bar(): + * void do_trace_foo_bar(args) { trace_foo_bar(args); } + * Then in the header file, declare the tracepoint: + * DECLARE_TRACEPOINT(foo_bar); + * And call your wrapper: + * static inline void some_inlined_function() { + * [..] + * if (tracepoint_enabled(foo_bar)) + * do_trace_foo_bar(args); + * [..] + * } + * + * Note: tracepoint_enabled(foo_bar) is equivalent to trace_foo_bar_enabled() + * but is safe to have in headers, where trace_foo_bar_enabled() is not. + */ +#define DECLARE_TRACEPOINT(tp) \ + extern struct tracepoint __tracepoint_##tp + +#ifdef CONFIG_TRACEPOINTS +# define tracepoint_enabled(tp) \ + static_key_false(&(__tracepoint_##tp).key) +#else +# define tracepoint_enabled(tracepoint) false +#endif + #endif -- cgit v1.2.3 From c65fc2276f0f022f5ad4a84658add2b28cff7227 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 24 Sep 2020 12:43:46 -0400 Subject: mm/page_ref: Convert the open coded tracepoint enabled to the new helper As more use cases of checking if a tracepoint is enabled in a header are coming to fruition, a helper macro, tracepoint_enabled(), has been added to check if a tracepoint is enabled or not, and can be used with minimal header requirements (avoid "include hell"). Convert the page_ref logic over to the new helper macro. Cc: Joonsoo Kim Cc: Michal Nazarewicz Cc: Vlastimil Babka Cc: Minchan Kim Cc: Mel Gorman Cc: "Kirill A. Shutemov" Cc: Sergey Senozhatsky Cc: Arnd Bergmann Signed-off-by: Steven Rostedt (VMware) --- include/linux/page_ref.h | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index d27701199a4d..f3318f34fc54 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -7,13 +7,13 @@ #include #include -extern struct tracepoint __tracepoint_page_ref_set; -extern struct tracepoint __tracepoint_page_ref_mod; -extern struct tracepoint __tracepoint_page_ref_mod_and_test; -extern struct tracepoint __tracepoint_page_ref_mod_and_return; -extern struct tracepoint __tracepoint_page_ref_mod_unless; -extern struct tracepoint __tracepoint_page_ref_freeze; -extern struct tracepoint __tracepoint_page_ref_unfreeze; +DECLARE_TRACEPOINT(page_ref_set); +DECLARE_TRACEPOINT(page_ref_mod); +DECLARE_TRACEPOINT(page_ref_mod_and_test); +DECLARE_TRACEPOINT(page_ref_mod_and_return); +DECLARE_TRACEPOINT(page_ref_mod_unless); +DECLARE_TRACEPOINT(page_ref_freeze); +DECLARE_TRACEPOINT(page_ref_unfreeze); #ifdef CONFIG_DEBUG_PAGE_REF @@ -24,7 +24,7 @@ extern struct tracepoint __tracepoint_page_ref_unfreeze; * * See trace_##name##_enabled(void) in include/linux/tracepoint.h */ -#define page_ref_tracepoint_active(t) static_key_false(&(t).key) +#define page_ref_tracepoint_active(t) tracepoint_enabled(t) extern void __page_ref_set(struct page *page, int v); extern void __page_ref_mod(struct page *page, int v); @@ -75,7 +75,7 @@ static inline int page_count(struct page *page) static inline void set_page_count(struct page *page, int v) { atomic_set(&page->_refcount, v); - if (page_ref_tracepoint_active(__tracepoint_page_ref_set)) + if (page_ref_tracepoint_active(page_ref_set)) __page_ref_set(page, v); } @@ -91,14 +91,14 @@ static inline void init_page_count(struct page *page) static inline void page_ref_add(struct page *page, int nr) { atomic_add(nr, &page->_refcount); - if (page_ref_tracepoint_active(__tracepoint_page_ref_mod)) + if (page_ref_tracepoint_active(page_ref_mod)) __page_ref_mod(page, nr); } static inline void page_ref_sub(struct page *page, int nr) { atomic_sub(nr, &page->_refcount); - if (page_ref_tracepoint_active(__tracepoint_page_ref_mod)) + if (page_ref_tracepoint_active(page_ref_mod)) __page_ref_mod(page, -nr); } @@ -106,7 +106,7 @@ static inline int page_ref_sub_return(struct page *page, int nr) { int ret = atomic_sub_return(nr, &page->_refcount); - if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return)) + if (page_ref_tracepoint_active(page_ref_mod_and_return)) __page_ref_mod_and_return(page, -nr, ret); return ret; } @@ -114,14 +114,14 @@ static inline int page_ref_sub_return(struct page *page, int nr) static inline void page_ref_inc(struct page *page) { atomic_inc(&page->_refcount); - if (page_ref_tracepoint_active(__tracepoint_page_ref_mod)) + if (page_ref_tracepoint_active(page_ref_mod)) __page_ref_mod(page, 1); } static inline void page_ref_dec(struct page *page) { atomic_dec(&page->_refcount); - if (page_ref_tracepoint_active(__tracepoint_page_ref_mod)) + if (page_ref_tracepoint_active(page_ref_mod)) __page_ref_mod(page, -1); } @@ -129,7 +129,7 @@ static inline int page_ref_sub_and_test(struct page *page, int nr) { int ret = atomic_sub_and_test(nr, &page->_refcount); - if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_test)) + if (page_ref_tracepoint_active(page_ref_mod_and_test)) __page_ref_mod_and_test(page, -nr, ret); return ret; } @@ -138,7 +138,7 @@ static inline int page_ref_inc_return(struct page *page) { int ret = atomic_inc_return(&page->_refcount); - if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return)) + if (page_ref_tracepoint_active(page_ref_mod_and_return)) __page_ref_mod_and_return(page, 1, ret); return ret; } @@ -147,7 +147,7 @@ static inline int page_ref_dec_and_test(struct page *page) { int ret = atomic_dec_and_test(&page->_refcount); - if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_test)) + if (page_ref_tracepoint_active(page_ref_mod_and_test)) __page_ref_mod_and_test(page, -1, ret); return ret; } @@ -156,7 +156,7 @@ static inline int page_ref_dec_return(struct page *page) { int ret = atomic_dec_return(&page->_refcount); - if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return)) + if (page_ref_tracepoint_active(page_ref_mod_and_return)) __page_ref_mod_and_return(page, -1, ret); return ret; } @@ -165,7 +165,7 @@ static inline int page_ref_add_unless(struct page *page, int nr, int u) { int ret = atomic_add_unless(&page->_refcount, nr, u); - if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_unless)) + if (page_ref_tracepoint_active(page_ref_mod_unless)) __page_ref_mod_unless(page, nr, ret); return ret; } @@ -174,7 +174,7 @@ static inline int page_ref_freeze(struct page *page, int count) { int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count); - if (page_ref_tracepoint_active(__tracepoint_page_ref_freeze)) + if (page_ref_tracepoint_active(page_ref_freeze)) __page_ref_freeze(page, count, ret); return ret; } @@ -185,7 +185,7 @@ static inline void page_ref_unfreeze(struct page *page, int count) VM_BUG_ON(count == 0); atomic_set_release(&page->_refcount, count); - if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze)) + if (page_ref_tracepoint_active(page_ref_unfreeze)) __page_ref_unfreeze(page, count); } -- cgit v1.2.3 From e0f9956a3862b32ad73869a8e52a33c84aafa46f Mon Sep 17 00:00:00 2001 From: "Chuah, Kim Tatt" Date: Fri, 25 Sep 2020 17:40:41 +0800 Subject: net: stmmac: Add option for VLAN filter fail queue enable Add option in plat_stmmacenet_data struct to enable VLAN Filter Fail Queuing. This option allows packets that fail VLAN filter to be routed to a specific Rx queue when Receive All is also set. When this option is enabled: - Enable VFFQ only when entering promiscuous mode, because Receive All will pass up all rx packets that failed address filtering (similar to promiscuous mode). - VLAN-promiscuous mode is never entered to allow rx packet to fail VLAN filters and get routed to selected VFFQ Rx queue. Reviewed-by: Voon Weifeng Reviewed-by: Ong Boon Leong Signed-off-by: Chuah, Kim Tatt Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- include/linux/stmmac.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index bd964c31d333..00e83c877496 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -198,5 +198,7 @@ struct plat_stmmacenet_data { int mac_port_sel_speed; bool en_tx_lpi_clockgating; int has_xgmac; + bool vlan_fail_q_en; + u8 vlan_fail_q; }; #endif -- cgit v1.2.3 From ba5f4cfeac77fca981b199ec7f2396a3616e5216 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 24 Sep 2020 12:58:40 -0700 Subject: bpf: Add comment to document BTF type PTR_TO_BTF_ID_OR_NULL The meaning of PTR_TO_BTF_ID_OR_NULL differs slightly from other types denoted with the *_OR_NULL type. For example the types PTR_TO_SOCKET and PTR_TO_SOCKET_OR_NULL can be used for branch analysis because the type PTR_TO_SOCKET is guaranteed to _not_ have a null value. In contrast PTR_TO_BTF_ID and BTF_TO_BTF_ID_OR_NULL have slightly different meanings. A PTR_TO_BTF_TO_ID may be a pointer to NULL value, but it is safe to read this pointer in the program context because the program context will handle any faults. The fallout is for PTR_TO_BTF_ID the verifier can assume reads are safe, but can not use the type in branch analysis. Additionally, authors need to be extra careful when passing PTR_TO_BTF_ID into helpers. In general helpers consuming type PTR_TO_BTF_ID will need to assume it may be null. Seeing the above is not obvious to readers without the back knowledge lets add a comment in the type definition. Editorial comment, as networking and tracing programs get closer and more tightly merged we may need to consider a new type that we can ensure is non-null for branch analysis and also passing into helpers. Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Acked-by: Lorenz Bauer --- include/linux/bpf.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d0937f1d2980..79902325bef8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -383,8 +383,22 @@ enum bpf_reg_type { PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ - PTR_TO_BTF_ID, /* reg points to kernel struct */ - PTR_TO_BTF_ID_OR_NULL, /* reg points to kernel struct or NULL */ + /* PTR_TO_BTF_ID points to a kernel struct that does not need + * to be null checked by the BPF program. This does not imply the + * pointer is _not_ null and in practice this can easily be a null + * pointer when reading pointer chains. The assumption is program + * context will handle null pointer dereference typically via fault + * handling. The verifier must keep this in mind and can make no + * assumptions about null or non-null when doing branch analysis. + * Further, when passed into helpers the helpers can not, without + * additional context, assume the value is non-null. + */ + PTR_TO_BTF_ID, + /* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not + * been checked for null. Used primarily to inform the verifier + * an explicit null check is required for this struct. + */ + PTR_TO_BTF_ID_OR_NULL, PTR_TO_MEM, /* reg points to valid memory region */ PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */ PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */ -- cgit v1.2.3 From da9aa5d96bfe49e903ce2bc01cfb8a776c2619e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 26 Sep 2020 09:03:57 +0200 Subject: fs: remove vfs_statx_fd vfs_statx_fd is only used to implement vfs_fstat. Remove vfs_statx_fd and just implement vfs_fstat directly. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e019ea2f1347..f100d9f711a3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3163,7 +3163,7 @@ extern const struct inode_operations simple_symlink_inode_operations; extern int iterate_dir(struct file *, struct dir_context *); extern int vfs_statx(int, const char __user *, int, struct kstat *, u32); -extern int vfs_statx_fd(unsigned int, struct kstat *, u32, unsigned int); +int vfs_fstat(int fd, struct kstat *stat); static inline int vfs_stat(const char __user *filename, struct kstat *stat) { @@ -3181,11 +3181,6 @@ static inline int vfs_fstatat(int dfd, const char __user *filename, return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, stat, STATX_BASIC_STATS); } -static inline int vfs_fstat(int fd, struct kstat *stat) -{ - return vfs_statx_fd(fd, stat, STATX_BASIC_STATS, 0); -} - extern const char *vfs_get_link(struct dentry *, struct delayed_call *); extern int vfs_readlink(struct dentry *, char __user *, int); -- cgit v1.2.3 From 0b2c6693b4220595e9cff95d829d5d5bc5d544dc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 26 Sep 2020 09:03:58 +0200 Subject: fs: implement vfs_stat and vfs_lstat in terms of vfs_fstatat Go through vfs_fstatat instead of duplicating the *stat to statx mapping three times. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f100d9f711a3..b43c9ad7c3c2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3165,21 +3165,19 @@ extern int iterate_dir(struct file *, struct dir_context *); extern int vfs_statx(int, const char __user *, int, struct kstat *, u32); int vfs_fstat(int fd, struct kstat *stat); -static inline int vfs_stat(const char __user *filename, struct kstat *stat) +static inline int vfs_fstatat(int dfd, const char __user *filename, + struct kstat *stat, int flags) { - return vfs_statx(AT_FDCWD, filename, AT_NO_AUTOMOUNT, + return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, stat, STATX_BASIC_STATS); } -static inline int vfs_lstat(const char __user *name, struct kstat *stat) +static inline int vfs_stat(const char __user *filename, struct kstat *stat) { - return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT, - stat, STATX_BASIC_STATS); + return vfs_fstatat(AT_FDCWD, filename, stat, 0); } -static inline int vfs_fstatat(int dfd, const char __user *filename, - struct kstat *stat, int flags) +static inline int vfs_lstat(const char __user *name, struct kstat *stat) { - return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, - stat, STATX_BASIC_STATS); + return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW); } extern const char *vfs_get_link(struct dentry *, struct delayed_call *); -- cgit v1.2.3 From 09f1bde4017e9c34749da2918b3926799c77bce8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 26 Sep 2020 09:03:59 +0200 Subject: fs: move vfs_fstatat out of line This allows to keep vfs_statx static in fs/stat.c to prepare for the following changes. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b43c9ad7c3c2..1ac68236bc09 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3162,15 +3162,10 @@ extern const struct inode_operations simple_symlink_inode_operations; extern int iterate_dir(struct file *, struct dir_context *); -extern int vfs_statx(int, const char __user *, int, struct kstat *, u32); +int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, + int flags); int vfs_fstat(int fd, struct kstat *stat); -static inline int vfs_fstatat(int dfd, const char __user *filename, - struct kstat *stat, int flags) -{ - return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, - stat, STATX_BASIC_STATS); -} static inline int vfs_stat(const char __user *filename, struct kstat *stat) { return vfs_fstatat(AT_FDCWD, filename, stat, 0); -- cgit v1.2.3 From f2d077ff1b5c17008cff5dc27e7356a694e55462 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 26 Sep 2020 09:04:01 +0200 Subject: fs: remove KSTAT_QUERY_FLAGS KSTAT_QUERY_FLAGS expands to AT_STATX_SYNC_TYPE, which itself already is a mask. Remove the double name, especially given that the prefix is a little confusing vs the normal AT_* flags. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/stat.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stat.h b/include/linux/stat.h index 56614af83d4a..fff27e603814 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -19,8 +19,6 @@ #include #include -#define KSTAT_QUERY_FLAGS (AT_STATX_SYNC_TYPE) - struct kstat { u32 result_mask; /* What fields the user got */ umode_t mode; -- cgit v1.2.3 From e622129569963fa3bab976685443756e5da70da9 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Sun, 27 Sep 2020 16:01:50 +0800 Subject: ptp: add stub function for ptp_get_msgtype() Added the missing stub function for ptp_get_msgtype(). Fixes: 036c508ba95e ("ptp: Add generic ptp message type function") Reported-by: Randy Dunlap Signed-off-by: Yangbo Lu Acked-by: Randy Dunlap # build-tested Signed-off-by: David S. Miller --- include/linux/ptp_classify.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index 8437307cca8c..c6487b7ab026 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -134,5 +134,13 @@ static inline struct ptp_header *ptp_parse_header(struct sk_buff *skb, { return NULL; } +static inline u8 ptp_get_msgtype(const struct ptp_header *hdr, + unsigned int type) +{ + /* The return is meaningless. The stub function would not be + * executed since no available header from ptp_parse_header. + */ + return 0; +} #endif #endif /* _PTP_CLASSIFY_H_ */ -- cgit v1.2.3 From c1135c7fd0e95740cd4619ce389f43ffce043140 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Wed, 16 Sep 2020 11:48:09 +0800 Subject: Drivers: hv: vmbus: Introduce types of GPADL This patch introduces two types of GPADL: HV_GPADL_{BUFFER, RING}. The types of GPADL are purely the concept in the guest, IOW the hypervisor treat them as the same. The reason of introducing the types for GPADL is to support guests whose page size is not 4k (the page size of Hyper-V hypervisor). In these guests, both the headers and the data parts of the ringbuffers need to be aligned to the PAGE_SIZE, because 1) some of the ringbuffers will be mapped into userspace and 2) we use "double mapping" mechanism to support fast wrap-around, and "double mapping" relies on ringbuffers being page-aligned. However, the Hyper-V hypervisor only uses 4k (HV_HYP_PAGE_SIZE) headers. Our solution to this is that we always make the headers of ringbuffers take one guest page and when GPADL is established between the guest and hypervisor, the only first 4k of header is used. To handle this special case, we need the types of GPADL to differ different guest memory usage for GPADL. Type enum is introduced along with several general interfaces to describe the differences between normal buffer GPADL and ringbuffer GPADL. Signed-off-by: Boqun Feng Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20200916034817.30282-4-boqun.feng@gmail.com Signed-off-by: Wei Liu --- include/linux/hyperv.h | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 38100e80360a..9c19149c0e1a 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -29,6 +29,48 @@ #pragma pack(push, 1) +/* + * Types for GPADL, decides is how GPADL header is created. + * + * It doesn't make much difference between BUFFER and RING if PAGE_SIZE is the + * same as HV_HYP_PAGE_SIZE. + * + * If PAGE_SIZE is bigger than HV_HYP_PAGE_SIZE, the headers of ring buffers + * will be of PAGE_SIZE, however, only the first HV_HYP_PAGE will be put + * into gpadl, therefore the number for HV_HYP_PAGE and the indexes of each + * HV_HYP_PAGE will be different between different types of GPADL, for example + * if PAGE_SIZE is 64K: + * + * BUFFER: + * + * gva: |-- 64k --|-- 64k --| ... | + * gpa: | 4k | 4k | ... | 4k | 4k | 4k | ... | 4k | + * index: 0 1 2 15 16 17 18 .. 31 32 ... + * | | ... | | | ... | ... + * v V V V V V + * gpadl: | 4k | 4k | ... | 4k | 4k | 4k | ... | 4k | ... | + * index: 0 1 2 ... 15 16 17 18 .. 31 32 ... + * + * RING: + * + * | header | data | header | data | + * gva: |-- 64k --|-- 64k --| ... |-- 64k --|-- 64k --| ... | + * gpa: | 4k | .. | 4k | 4k | ... | 4k | ... | 4k | .. | 4k | .. | ... | + * index: 0 1 16 17 18 31 ... n n+1 n+16 ... 2n + * | / / / | / / + * | / / / | / / + * | / / ... / ... | / ... / + * | / / / | / / + * | / / / | / / + * V V V V V V v + * gpadl: | 4k | 4k | ... | ... | 4k | 4k | ... | + * index: 0 1 2 ... 16 ... n-15 n-14 n-13 ... 2n-30 + */ +enum hv_gpadl_type { + HV_GPADL_BUFFER, + HV_GPADL_RING +}; + /* Single-page buffer */ struct hv_page_buffer { u32 len; @@ -111,7 +153,7 @@ struct hv_ring_buffer { } feature_bits; /* Pad it to PAGE_SIZE so that data starts on page boundary */ - u8 reserved2[4028]; + u8 reserved2[PAGE_SIZE - 68]; /* * Ring data starts here + RingDataStartOffset @@ -120,6 +162,10 @@ struct hv_ring_buffer { u8 buffer[]; } __packed; +/* Calculate the proper size of a ringbuffer, it must be page-aligned */ +#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(sizeof(struct hv_ring_buffer) + \ + (payload_sz)) + struct hv_ring_buffer_info { struct hv_ring_buffer *ring_buffer; u32 ring_size; /* Include the shared header */ -- cgit v1.2.3 From bca6b91dfdd4347d8fe0bb97377dda0cacd51d82 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Wed, 16 Sep 2020 11:48:11 +0800 Subject: Drivers: hv: vmbus: Move virt_to_hvpfn() to hyperv header There will be more places other than vmbus where we need to calculate the Hyper-V page PFN from a virtual address, so move virt_to_hvpfn() to hyperv generic header. Signed-off-by: Boqun Feng Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20200916034817.30282-6-boqun.feng@gmail.com Signed-off-by: Wei Liu --- include/linux/hyperv.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 9c19149c0e1a..83456dc181a8 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -14,6 +14,7 @@ #include +#include #include #include #include @@ -23,6 +24,7 @@ #include #include #include +#include #define MAX_PAGE_BUFFER_COUNT 32 #define MAX_MULTIPAGE_BUFFER_COUNT 32 /* 128K */ @@ -1676,4 +1678,17 @@ struct hyperv_pci_block_ops { extern struct hyperv_pci_block_ops hvpci_block_ops; +static inline unsigned long virt_to_hvpfn(void *addr) +{ + phys_addr_t paddr; + + if (is_vmalloc_addr(addr)) + paddr = page_to_phys(vmalloc_to_page(addr)) + + offset_in_page(addr); + else + paddr = __pa(addr); + + return paddr >> HV_HYP_PAGE_SHIFT; +} + #endif /* _HYPERV_H */ -- cgit v1.2.3 From 106dee08408118c5b50387817e74419436e8b651 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Wed, 16 Sep 2020 11:48:12 +0800 Subject: hv: hyperv.h: Introduce some hvpfn helper functions When a guest communicate with the hypervisor, it must use HV_HYP_PAGE to calculate PFN, so introduce a few hvpfn helper functions as the counterpart of the page helper functions. This is the preparation for supporting guest whose PAGE_SIZE is not 4k. Signed-off-by: Boqun Feng Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20200916034817.30282-7-boqun.feng@gmail.com Signed-off-by: Wei Liu --- include/linux/hyperv.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 83456dc181a8..1ce131f29f3b 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1691,4 +1691,9 @@ static inline unsigned long virt_to_hvpfn(void *addr) return paddr >> HV_HYP_PAGE_SHIFT; } +#define NR_HV_HYP_PAGES_IN_PAGE (PAGE_SIZE / HV_HYP_PAGE_SIZE) +#define offset_in_hvpage(ptr) ((unsigned long)(ptr) & ~HV_HYP_PAGE_MASK) +#define HVPFN_UP(x) (((x) + HV_HYP_PAGE_SIZE-1) >> HV_HYP_PAGE_SHIFT) +#define page_to_hvpfn(page) (page_to_pfn(page) * NR_HV_HYP_PAGES_IN_PAGE) + #endif /* _HYPERV_H */ -- cgit v1.2.3 From f2d10ff4a903813df767a4b56b651a26b938df06 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Sun, 27 Sep 2020 22:15:29 +0100 Subject: kgdb: Honour the kprobe blocklist when setting breakpoints Currently kgdb has absolutely no safety rails in place to discourage or prevent a user from placing a breakpoint in dangerous places such as the debugger's own trap entry/exit and other places where it is not safe to take synchronous traps. Introduce a new config symbol KGDB_HONOUR_BLOCKLIST and modify the default implementation of kgdb_validate_break_address() so that we use the kprobe blocklist to prohibit instrumentation of critical functions if the config symbol is set. The config symbol dependencies are set to ensure that the blocklist will be enabled by default if we enable KGDB and are compiling for an architecture where we HAVE_KPROBES. Suggested-by: Peter Zijlstra Reviewed-by: Douglas Anderson Reviewed-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20200927211531.1380577-2-daniel.thompson@linaro.org Signed-off-by: Daniel Thompson --- include/linux/kgdb.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 477b8b7c908f..0d6cf64c8bb1 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef CONFIG_HAVE_ARCH_KGDB #include #endif @@ -335,6 +336,23 @@ extern int kgdb_nmicallin(int cpu, int trapnr, void *regs, int err_code, atomic_t *snd_rdy); extern void gdbstub_exit(int status); +/* + * kgdb and kprobes both use the same (kprobe) blocklist (which makes sense + * given they are both typically hooked up to the same trap meaning on most + * architectures one cannot be used to debug the other) + * + * However on architectures where kprobes is not (yet) implemented we permit + * breakpoints everywhere rather than blocking everything by default. + */ +static inline bool kgdb_within_blocklist(unsigned long addr) +{ +#ifdef CONFIG_KGDB_HONOUR_BLOCKLIST + return within_kprobe_blacklist(addr); +#else + return false; +#endif +} + extern int kgdb_single_step; extern atomic_t kgdb_active; #define in_dbg_master() \ -- cgit v1.2.3 From d2b7588a47de8322891de38ec14d15105d66cb1e Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 21 Sep 2020 19:28:04 -0700 Subject: nl80211: support S1G capability overrides in assoc NL80211_ATTR_S1G_CAPABILITY can be passed along with NL80211_ATTR_S1G_CAPABILITY_MASK to NL80211_CMD_ASSOCIATE to indicate S1G capabilities which should override the hardware capabilities in eg. the association request. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200922022818.15855-4-thomas@adapt-ip.com [johannes: always require both attributes together, commit message] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 53fba39d4ba6..f71cffa18176 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2330,6 +2330,8 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) } /* S1G Capabilities Information field */ +#define IEEE80211_S1G_CAPABILITY_LEN 15 + #define S1G_CAP0_S1G_LONG BIT(0) #define S1G_CAP0_SGI_1MHZ BIT(1) #define S1G_CAP0_SGI_2MHZ BIT(2) -- cgit v1.2.3 From 9eaffe5078ca0808603cdd15c4eaf0106a996f3a Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 21 Sep 2020 19:28:06 -0700 Subject: cfg80211: convert S1G beacon to scan results The S1G beacon is an extension frame as opposed to management frame for the regular beacon. This means we may have to occasionally cast the frame buffer to a different header type. Luckily this isn't too bad as scan results mostly only care about the IEs. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200922022818.15855-6-thomas@adapt-ip.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f71cffa18176..1ce0b37441b9 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -151,6 +151,9 @@ #define IEEE80211_ANO_NETTYPE_WILD 15 +/* bits unique to S1G beacon */ +#define IEEE80211_S1G_BCN_NEXT_TBTT 0x100 + /* control extension - for IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTL_EXT */ #define IEEE80211_CTL_EXT_POLL 0x2000 #define IEEE80211_CTL_EXT_SPR 0x3000 @@ -553,6 +556,28 @@ static inline bool ieee80211_is_s1g_beacon(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON); } +/** + * ieee80211_next_tbtt_present - check if IEEE80211_FTYPE_EXT && + * IEEE80211_STYPE_S1G_BEACON && IEEE80211_S1G_BCN_NEXT_TBTT + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee80211_next_tbtt_present(__le16 fc) +{ + return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == + cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON) && + fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT); +} + +/** + * ieee80211_is_s1g_short_beacon - check if next tbtt present bit is set. Only + * true for S1G beacons when they're short. + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee80211_is_s1g_short_beacon(__le16 fc) +{ + return ieee80211_is_s1g_beacon(fc) && ieee80211_next_tbtt_present(fc); +} + /** * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder @@ -1034,6 +1059,13 @@ struct ieee80211_ext { u8 change_seq; u8 variable[0]; } __packed s1g_beacon; + struct { + u8 sa[ETH_ALEN]; + __le32 timestamp; + u8 change_seq; + u8 next_tbtt[3]; + u8 variable[0]; + } __packed s1g_short_beacon; } u; } __packed __aligned(2); -- cgit v1.2.3 From 80ca25711380c8eabe51eed875ca9432b4f8939e Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 21 Sep 2020 19:28:09 -0700 Subject: cfg80211: handle Association Response from S1G STA The sending STA type is implicit based on beacon or probe response content. If sending STA was an S1G STA, adjust the Information Element location accordingly. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200922022818.15855-9-thomas@adapt-ip.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 1ce0b37441b9..d0fda8424118 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1100,6 +1100,11 @@ struct ieee80211_mgmt { /* followed by Supported rates */ u8 variable[0]; } __packed assoc_resp, reassoc_resp; + struct { + __le16 capab_info; + __le16 status_code; + u8 variable[0]; + } __packed s1g_assoc_resp, s1g_reassoc_resp; struct { __le16 capab_info; __le16 listen_interval; -- cgit v1.2.3 From 05d109576a36fd498e5db2d905eb50c7dd844b83 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 21 Sep 2020 19:28:10 -0700 Subject: mac80211: encode listen interval for S1G S1G allows listen interval up to 2^14 * 10000 beacon intervals. In order to do this listen interval needs a scaling factor applied to the lower 14 bits. Calculate this and properly encode the listen interval for S1G STAs. See IEEE802.11ah-2016 Table 9-44a for reference. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200922022818.15855-10-thomas@adapt-ip.com [move listen_int_usf into function using it] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index d0fda8424118..7b6af47dd279 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2448,6 +2448,13 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define S1G_OPER_CH_WIDTH_PRIMARY_1MHZ BIT(0) #define S1G_OPER_CH_WIDTH_OPER GENMASK(4, 1) + +#define LISTEN_INT_USF GENMASK(15, 14) +#define LISTEN_INT_UI GENMASK(13, 0) + +#define IEEE80211_MAX_USF FIELD_MAX(LISTEN_INT_USF) +#define IEEE80211_MAX_UI FIELD_MAX(LISTEN_INT_UI) + /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 -- cgit v1.2.3 From 1d00ce807efaa0ee3a96de7801be042a06d35873 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 21 Sep 2020 19:28:15 -0700 Subject: mac80211: support S1G association The changes required for associating in S1G are: - apply S1G BSS channel info before assoc - mark all S1G STAs as QoS STAs - include and parse AID request element - handle new Association Response format - don't fail assoc if supported rates element is missing Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200922022818.15855-15-thomas@adapt-ip.com [pass skb to ieee80211_add_aid_request_ie(), remove unused variable 'bss'] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 7b6af47dd279..f2f56b287aed 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -987,6 +987,25 @@ enum ieee80211_vht_opmode_bits { IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF = 0x80, }; +/** + * enum ieee80211_s1g_chanwidth + * These are defined in IEEE802.11-2016ah Table 10-20 + * as BSS Channel Width + * + * @IEEE80211_S1G_CHANWIDTH_1MHZ: 1MHz operating channel + * @IEEE80211_S1G_CHANWIDTH_2MHZ: 2MHz operating channel + * @IEEE80211_S1G_CHANWIDTH_4MHZ: 4MHz operating channel + * @IEEE80211_S1G_CHANWIDTH_8MHZ: 8MHz operating channel + * @IEEE80211_S1G_CHANWIDTH_16MHZ: 16MHz operating channel + */ +enum ieee80211_s1g_chanwidth { + IEEE80211_S1G_CHANWIDTH_1MHZ = 0, + IEEE80211_S1G_CHANWIDTH_2MHZ = 1, + IEEE80211_S1G_CHANWIDTH_4MHZ = 3, + IEEE80211_S1G_CHANWIDTH_8MHZ = 7, + IEEE80211_S1G_CHANWIDTH_16MHZ = 15, +}; + #define WLAN_SA_QUERY_TR_ID_LEN 2 #define WLAN_MEMBERSHIP_LEN 8 #define WLAN_USER_POSITION_LEN 16 @@ -2854,6 +2873,8 @@ enum ieee80211_eid { WLAN_EID_REDUCED_NEIGHBOR_REPORT = 201, + WLAN_EID_AID_REQUEST = 210, + WLAN_EID_AID_RESPONSE = 211, WLAN_EID_S1G_BCN_COMPAT = 213, WLAN_EID_S1G_SHORT_BCN_INTERVAL = 214, WLAN_EID_S1G_CAPABILITIES = 217, -- cgit v1.2.3 From f5bec330e3010450daeb5cb6a94a4a7c54afa306 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Mon, 28 Sep 2020 00:28:11 -0700 Subject: nl80211: extend support to config spatial reuse parameter set Allow the user to configure below Spatial Reuse Parameter Set element. * Non-SRG OBSS PD Max Offset * SRG BSS Color Bitmap * SRG Partial BSSID Bitmap Signed-off-by: Rajkumar Manoharan Link: https://lore.kernel.org/r/1601278091-20313-2-git-send-email-rmanohar@codeaurora.org Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f2f56b287aed..770408b2fdaf 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2350,8 +2350,11 @@ ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper) } /* HE Spatial Reuse defines */ -#define IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT 0x4 -#define IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT 0x8 +#define IEEE80211_HE_SPR_PSR_DISALLOWED BIT(0) +#define IEEE80211_HE_SPR_NON_SRG_OBSS_PD_SR_DISALLOWED BIT(1) +#define IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT BIT(2) +#define IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT BIT(3) +#define IEEE80211_HE_SPR_HESIGA_SR_VAL15_ALLOWED BIT(4) /* * ieee80211_he_spr_size - calculate 802.11ax HE Spatial Reuse IE size -- cgit v1.2.3 From f5be3a61fdb5dd11ef60173e2783ccf62685f892 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 22 Sep 2020 13:53:45 +0800 Subject: arm64: perf: Add support caps under sysfs ARMv8.4-PMU introduces the PMMIR_EL1 registers and some new PMU events, like STALL_SLOT etc, are related to it. Let's add a caps directory to /sys/bus/event_source/devices/armv8_pmuv3_0/ and support slots from PMMIR_EL1 registers in this entry. The user programs can get the slots from sysfs directly. /sys/bus/event_source/devices/armv8_pmuv3_0/caps/slots is exposed under sysfs. Both ARMv8.4-PMU and STALL_SLOT event are implemented, it returns the slots from PMMIR_EL1, otherwise it will return 0. Signed-off-by: Shaokun Zhang Cc: Will Deacon Cc: Mark Rutland Link: https://lore.kernel.org/r/1600754025-53535-1-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 5b616dde9a4c..505480217cf1 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -73,6 +73,7 @@ enum armpmu_attr_groups { ARMPMU_ATTR_GROUP_COMMON, ARMPMU_ATTR_GROUP_EVENTS, ARMPMU_ATTR_GROUP_FORMATS, + ARMPMU_ATTR_GROUP_CAPS, ARMPMU_NR_ATTR_GROUPS }; @@ -109,6 +110,8 @@ struct arm_pmu { struct notifier_block cpu_pm_nb; /* the attr_groups array must be NULL-terminated */ const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1]; + /* store the PMMIR_EL1 to expose slots */ + u64 reg_pmmir; /* Only to be used by ACPI probing code */ unsigned long acpi_cpuid; -- cgit v1.2.3 From ef24f97daac4d9450c956ab165d8337c2feca0e9 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Aug 2020 10:51:57 +0200 Subject: mtd: rawnand: Separate the ECC engine type and the ECC byte placement The use of "syndrome" placement should not be encoded in the ECC engine mode/type. Create a "placement" field in NAND chip and change all occurrences of the NAND_ECC_HW_SYNDROME enumeration to be just NAND_ECC_HW and possibly a placement entry like NAND_ECC_PLACEMENT_INTERLEAVED. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200827085208.16276-10-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 6 ++++-- include/linux/platform_data/mtd-davinci.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 10bbfbf4ad7f..cfd75a12f802 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -304,6 +304,7 @@ static const struct nand_ecc_caps __name = { \ /** * struct nand_ecc_ctrl - Control structure for ECC * @mode: ECC mode + * @placement: OOB bytes placement * @algo: ECC algorithm * @steps: number of ECC steps per page * @size: data bytes per ECC step @@ -331,7 +332,7 @@ static const struct nand_ecc_caps __name = { \ * controller and always return contiguous in-band and * out-of-band data even if they're not stored * contiguously on the NAND chip (e.g. - * NAND_ECC_HW_SYNDROME interleaves in-band and + * NAND_ECC_PLACEMENT_INTERLEAVED interleaves in-band and * out-of-band data). * @write_page_raw: function to write a raw page without ECC. This function * should hide the specific layout used by the ECC @@ -339,7 +340,7 @@ static const struct nand_ecc_caps __name = { \ * in-band and out-of-band data. ECC controller is * responsible for doing the appropriate transformations * to adapt to its specific layout (e.g. - * NAND_ECC_HW_SYNDROME interleaves in-band and + * NAND_ECC_PLACEMENT_INTERLEAVED interleaves in-band and * out-of-band data). * @read_page: function to read a page according to the ECC generator * requirements; returns maximum number of bitflips corrected in @@ -356,6 +357,7 @@ static const struct nand_ecc_caps __name = { \ */ struct nand_ecc_ctrl { enum nand_ecc_mode mode; + enum nand_ecc_placement placement; enum nand_ecc_algo algo; int steps; int size; diff --git a/include/linux/platform_data/mtd-davinci.h b/include/linux/platform_data/mtd-davinci.h index 03e92c71b3fa..6e2b252a4ce6 100644 --- a/include/linux/platform_data/mtd-davinci.h +++ b/include/linux/platform_data/mtd-davinci.h @@ -69,6 +69,7 @@ struct davinci_nand_pdata { /* platform_data */ * using it with large page chips. */ enum nand_ecc_mode ecc_mode; + enum nand_ecc_placement ecc_placement; u8 ecc_bits; /* e.g. NAND_BUSWIDTH_16 */ -- cgit v1.2.3 From bace41f80f65dc4ba13c892bac783e7e81847379 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Aug 2020 10:51:58 +0200 Subject: mtd: rawnand: Use the new ECC engine type enumeration Mechanical switch from the legacy "mode" enumeration to the new "engine type" enumeration in drivers and board files. The device tree parsing is also updated to return the new enumeration from the old strings. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200827085208.16276-11-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 4 ++-- include/linux/platform_data/mtd-davinci.h | 8 ++++---- include/linux/platform_data/mtd-nand-s3c2410.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index cfd75a12f802..967b616c50df 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -303,7 +303,7 @@ static const struct nand_ecc_caps __name = { \ /** * struct nand_ecc_ctrl - Control structure for ECC - * @mode: ECC mode + * @engine_type: ECC engine type * @placement: OOB bytes placement * @algo: ECC algorithm * @steps: number of ECC steps per page @@ -356,7 +356,7 @@ static const struct nand_ecc_caps __name = { \ * @write_oob: function to write chip OOB data */ struct nand_ecc_ctrl { - enum nand_ecc_mode mode; + enum nand_ecc_engine_type engine_type; enum nand_ecc_placement placement; enum nand_ecc_algo algo; int steps; diff --git a/include/linux/platform_data/mtd-davinci.h b/include/linux/platform_data/mtd-davinci.h index 6e2b252a4ce6..dd474dd44848 100644 --- a/include/linux/platform_data/mtd-davinci.h +++ b/include/linux/platform_data/mtd-davinci.h @@ -60,15 +60,15 @@ struct davinci_nand_pdata { /* platform_data */ struct mtd_partition *parts; unsigned nr_parts; - /* none == NAND_ECC_NONE (strongly *not* advised!!) - * soft == NAND_ECC_SOFT - * else == NAND_ECC_HW, according to ecc_bits + /* none == NAND_ECC_ENGINE_TYPE_NONE (strongly *not* advised!!) + * soft == NAND_ECC_ENGINE_TYPE_SOFT + * else == NAND_ECC_ENGINE_TYPE_ON_HOST, according to ecc_bits * * All DaVinci-family chips support 1-bit hardware ECC. * Newer ones also support 4-bit ECC, but are awkward * using it with large page chips. */ - enum nand_ecc_mode ecc_mode; + enum nand_ecc_engine_type engine_type; enum nand_ecc_placement ecc_placement; u8 ecc_bits; diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h index 08675b16f9e1..25390fc3e795 100644 --- a/include/linux/platform_data/mtd-nand-s3c2410.h +++ b/include/linux/platform_data/mtd-nand-s3c2410.h @@ -49,7 +49,7 @@ struct s3c2410_platform_nand { unsigned int ignore_unset_ecc:1; - enum nand_ecc_mode ecc_mode; + enum nand_ecc_engine_type engine_type; int nr_sets; struct s3c2410_nand_set *sets; -- cgit v1.2.3 From d193792a26c216cb7db3cf12300c9414990fa603 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Aug 2020 10:51:59 +0200 Subject: mtd: nand: Create a helper to extract the ECC configuration Despite its current name, the eccreq field actually encodes both the NAND requirements and the final ECC configuration. That works fine when using on-die ECC since those 2 concepts match perfectly, but it starts being a problem as soon as we use on-host ECC engines, where we're not guaranteed to have a perfect match. Let's hide the ECC configuration access behind a helper so we can later split those 2 concepts. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200827085208.16276-12-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 8cf5bdbea782..9cbb41a5541c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -523,6 +523,16 @@ nanddev_get_memorg(struct nand_device *nand) return &nand->memorg; } +/** + * nanddev_get_ecc_conf() - Extract the ECC configuration from a NAND device + * @nand: NAND device + */ +static inline const struct nand_ecc_props * +nanddev_get_ecc_conf(struct nand_device *nand) +{ + return &nand->eccreq; +} + int nanddev_init(struct nand_device *nand, const struct nand_ops *ops, struct module *owner); void nanddev_cleanup(struct nand_device *nand); -- cgit v1.2.3 From 3316c8e3ad1fcaeefd4ffa93587dd78fb24e8afa Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Aug 2020 10:52:01 +0200 Subject: mtd: nand: Create helpers to set/extract the ECC requirements Despite its current name, the eccreq field actually encodes both the NAND requirements and the final ECC configuration. That works fine when using on-die ECC since those 2 concepts match perfectly, but it starts being a problem as soon as we use on-host ECC engines, where we're not guaranteed to have a perfect match. Let's hide the ECC requirements access behind helpers so we can later split those 2 concepts. As the structures have not been clarified yet, these helpers access the same internal variable as nanddev_get_ecc_conf() for now. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200827085208.16276-14-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 9cbb41a5541c..348fb2ad4d90 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -533,6 +533,30 @@ nanddev_get_ecc_conf(struct nand_device *nand) return &nand->eccreq; } +/** + * nanddev_get_ecc_requirements() - Extract the ECC requirements from a NAND + * device + * @nand: NAND device + */ +static inline const struct nand_ecc_props * +nanddev_get_ecc_requirements(struct nand_device *nand) +{ + return &nand->eccreq; +} + +/** + * nanddev_set_ecc_requirements() - Assign the ECC requirements of a NAND + * device + * @nand: NAND device + * @reqs: Requirements + */ +static inline void +nanddev_set_ecc_requirements(struct nand_device *nand, + const struct nand_ecc_props *reqs) +{ + nand->eccreq = *reqs; +} + int nanddev_init(struct nand_device *nand, const struct nand_ops *ops, struct module *owner); void nanddev_cleanup(struct nand_device *nand); -- cgit v1.2.3 From 93ef92f6f42275e3d6070b1c5020bfca0e614fff Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Aug 2020 10:52:03 +0200 Subject: mtd: nand: Use the new generic ECC object Embed a generic NAND ECC high-level object in the nand_device structure to carry all the ECC engine configuration/data. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200827085208.16276-16-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 348fb2ad4d90..697ea2474a7c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -301,7 +301,7 @@ struct nand_ecc { * struct nand_device - NAND device * @mtd: MTD instance attached to the NAND device * @memorg: memory layout - * @eccreq: ECC requirements + * @ecc: NAND ECC object attached to the NAND device * @rowconv: position to row address converter * @bbt: bad block table info * @ops: NAND operations attached to the NAND device @@ -309,8 +309,8 @@ struct nand_ecc { * Generic NAND object. Specialized NAND layers (raw NAND, SPI NAND, OneNAND) * should declare their own NAND object embedding a nand_device struct (that's * how inheritance is done). - * struct_nand_device->memorg and struct_nand_device->eccreq should be filled - * at device detection time to reflect the NAND device + * struct_nand_device->memorg and struct_nand_device->ecc.requirements should + * be filled at device detection time to reflect the NAND device * capabilities/requirements. Once this is done nanddev_init() can be called. * It will take care of converting NAND information into MTD ones, which means * the specialized NAND layers should never manually tweak @@ -319,7 +319,7 @@ struct nand_ecc { struct nand_device { struct mtd_info mtd; struct nand_memory_organization memorg; - struct nand_ecc_props eccreq; + struct nand_ecc ecc; struct nand_row_converter rowconv; struct nand_bbt bbt; const struct nand_ops *ops; @@ -530,7 +530,7 @@ nanddev_get_memorg(struct nand_device *nand) static inline const struct nand_ecc_props * nanddev_get_ecc_conf(struct nand_device *nand) { - return &nand->eccreq; + return &nand->ecc.ctx.conf; } /** @@ -541,7 +541,7 @@ nanddev_get_ecc_conf(struct nand_device *nand) static inline const struct nand_ecc_props * nanddev_get_ecc_requirements(struct nand_device *nand) { - return &nand->eccreq; + return &nand->ecc.requirements; } /** @@ -554,7 +554,7 @@ static inline void nanddev_set_ecc_requirements(struct nand_device *nand, const struct nand_ecc_props *reqs) { - nand->eccreq = *reqs; + nand->ecc.requirements = *reqs; } int nanddev_init(struct nand_device *nand, const struct nand_ops *ops, -- cgit v1.2.3 From 6b61d49a55796dbbc479eeb4465e59fd656c719c Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 18 Sep 2020 19:55:18 +0300 Subject: PM: runtime: Fix timer_expires data type on 32-bit arches Commit 8234f6734c5d ("PM-runtime: Switch autosuspend over to using hrtimers") switched PM runtime autosuspend to use hrtimers and all related time accounting in ns, but missed to update the timer_expires data type in struct dev_pm_info to u64. This causes the timer_expires value to be truncated on 32-bit architectures when assignment is done from u64 values: rpm_suspend() |- dev->power.timer_expires = expires; Fix it by changing the timer_expires type to u64. Fixes: 8234f6734c5d ("PM-runtime: Switch autosuspend over to using hrtimers") Signed-off-by: Grygorii Strashko Acked-by: Pavel Machek Acked-by: Vincent Guittot Cc: 5.0+ # 5.0+ [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index a30a4b54df52..47aca6bac1d6 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -590,7 +590,7 @@ struct dev_pm_info { #endif #ifdef CONFIG_PM struct hrtimer suspend_timer; - unsigned long timer_expires; + u64 timer_expires; struct work_struct work; wait_queue_head_t wait_queue; struct wake_irq *wakeirq; -- cgit v1.2.3 From c3d4ed1abecfcfc801199cfadb71f5b80e025d9e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 08:51:08 -0700 Subject: iomap: Allow filesystem to call iomap_dio_complete without i_rwsem This is to avoid the deadlock caused in btrfs because of O_DIRECT | O_DSYNC. Filesystems such as btrfs require i_rwsem while performing sync on a file. iomap_dio_rw() is called under i_rw_sem. This leads to a deadlock because of: iomap_dio_complete() generic_write_sync() btrfs_sync_file() Separate out iomap_dio_complete() from iomap_dio_rw(), so filesystems can call iomap_dio_complete() after unlocking i_rwsem. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Signed-off-by: Goldwyn Rodrigues Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 4d1d3c3469e9..172b3397a1a3 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -13,6 +13,7 @@ struct address_space; struct fiemap_extent_info; struct inode; +struct iomap_dio; struct iomap_writepage_ctx; struct iov_iter; struct kiocb; @@ -258,6 +259,10 @@ struct iomap_dio_ops { ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, bool wait_for_completion); +struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + const struct iomap_ops *ops, const struct iomap_dio_ops *dops, + bool wait_for_completion); +ssize_t iomap_dio_complete(struct iomap_dio *dio); int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); #ifdef CONFIG_SWAP -- cgit v1.2.3 From b8127113d01e53adba15b41aefd37b90ed83d631 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 28 Sep 2020 10:34:51 -0600 Subject: coresight: Use IS_ENABLED for CONFIGs that may be modules Checking for ifdef CONFIG_x fails if CONFIG_x=m. Use IS_ENABLED that is true for both built-ins and modules, instead. Required when building coresight components as modules. Cc: Mathieu Poirier Cc: Leo Yan Cc: Alexander Shishkin Cc: Randy Dunlap Cc: Suzuki K Poulose Cc: Greg Kroah-Hartman Cc: Russell King Reviewed-by: Suzuki K Poulose Signed-off-by: Kim Phillips Signed-off-by: Tingwei Zhang Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200928163513.70169-4-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/coresight.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 58fffdecdbfd..3bb738f9a326 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -324,7 +324,7 @@ struct coresight_ops { const struct coresight_ops_ect *ect_ops; }; -#ifdef CONFIG_CORESIGHT +#if IS_ENABLED(CONFIG_CORESIGHT) extern struct coresight_device * coresight_register(struct coresight_desc *desc); extern void coresight_unregister(struct coresight_device *csdev); -- cgit v1.2.3 From f2ccc7b7bfa91b5cad5a4d57b269f6d7bbafd67d Mon Sep 17 00:00:00 2001 From: Tingwei Zhang Date: Mon, 28 Sep 2020 10:35:07 -0600 Subject: coresight: cti: Don't disable ect device if it's not enabled If associated ect device is not enabled at first place, disable routine should not be called. Add ect_enabled flag to check whether ect device is enabled. Fix the issue in below case. Ect device is not available when associated coresight device enabled and the association is established after coresight device is enabled. Signed-off-by: Mike Leach Signed-off-by: Tingwei Zhang Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200928163513.70169-20-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/coresight.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 3bb738f9a326..7d3c87e5b97c 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -208,6 +208,7 @@ struct coresight_device { /* sysfs links between components */ int nr_links; bool has_conns_grp; + bool ect_enabled; /* true only if associated ect device is enabled */ }; /* -- cgit v1.2.3 From ea470b82f205fc1b0b5276575da519bb7d86db25 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 25 Sep 2020 17:48:55 +0100 Subject: regmap: add support to regmap_field_bulk_alloc/free apis Usage of regmap_field_alloc becomes much overhead when number of fields exceed more than 3. QCOM LPASS driver has extensively converted to use regmap_fields. Using new bulk api to allocate fields makes it much more cleaner code to read! Signed-off-by: Srinivas Kandagatla Tested-by: Srinivasa Rao Mandadapu Link: https://lore.kernel.org/r/20200925164856.10315-2-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- include/linux/regmap.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 1970ed59d49f..eafe833f0103 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1150,6 +1150,17 @@ struct regmap_field *devm_regmap_field_alloc(struct device *dev, struct regmap *regmap, struct reg_field reg_field); void devm_regmap_field_free(struct device *dev, struct regmap_field *field); +int regmap_field_bulk_alloc(struct regmap *regmap, + struct regmap_field **rm_field, + struct reg_field *reg_field, + int num_fields); +void regmap_field_bulk_free(struct regmap_field *field); +int devm_regmap_field_bulk_alloc(struct device *dev, struct regmap *regmap, + struct regmap_field **field, + struct reg_field *reg_field, int num_fields); +void devm_regmap_field_bulk_free(struct device *dev, + struct regmap_field *field); + int regmap_field_read(struct regmap_field *field, unsigned int *val); int regmap_field_update_bits_base(struct regmap_field *field, unsigned int mask, unsigned int val, -- cgit v1.2.3 From 1b4d60ec162f82ea29a2e7a907b5c6cc9f926321 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 25 Sep 2020 13:54:29 -0700 Subject: bpf: Enable BPF_PROG_TEST_RUN for raw_tracepoint Add .test_run for raw_tracepoint. Also, introduce a new feature that runs the target program on a specific CPU. This is achieved by a new flag in bpf_attr.test, BPF_F_TEST_RUN_ON_CPU. When this flag is set, the program is triggered on cpu with id bpf_attr.test.cpu. This feature is needed for BPF programs that handle perf_event and other percpu resources, as the program can access these resource locally. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200925205432.1777-2-songliubraving@fb.com --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 79902325bef8..db6dcdee7933 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1396,6 +1396,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); -- cgit v1.2.3 From 5b2a828b98ec1872799b1b4d82113c76a12d594f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 Sep 2020 22:47:21 -0700 Subject: fscrypt: export fscrypt_d_revalidate() Dentries that represent no-key names must have a dentry_operations that includes fscrypt_d_revalidate(). Currently, this is handled by fscrypt_prepare_lookup() installing fscrypt_d_ops. However, ceph support for encryption (https://lore.kernel.org/r/20200914191707.380444-1-jlayton@kernel.org) can't use fscrypt_d_ops, since ceph already has its own dentry_operations. Similarly, ext4 and f2fs support for directories that are both encrypted and casefolded (https://lore.kernel.org/r/20200923010151.69506-1-drosen@google.com) can't use fscrypt_d_ops either, since casefolding requires some dentry operations too. To satisfy both users, we need to move the responsibility of installing the dentry_operations to filesystems. In preparation for this, export fscrypt_d_revalidate() and give it a !CONFIG_FS_ENCRYPTION stub. Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/20200924054721.187797-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index f1757e73162d..a8f7a43f031b 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -197,6 +197,7 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode, bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len); u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); +int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); /* bio.c */ void fscrypt_decrypt_bio(struct bio *bio); @@ -454,6 +455,12 @@ static inline u64 fscrypt_fname_siphash(const struct inode *dir, return 0; } +static inline int fscrypt_d_revalidate(struct dentry *dentry, + unsigned int flags) +{ + return 1; +} + /* bio.c */ static inline void fscrypt_decrypt_bio(struct bio *bio) { -- cgit v1.2.3 From efc68158c429f37d87fd02ee9a26913c78546fc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 25 Sep 2020 23:25:01 +0200 Subject: bpf: change logging calls from verbose() to bpf_log() and use log pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for moving code around, change a bunch of references to env->log (and the verbose() logging helper) to use bpf_log() and a direct pointer to struct bpf_verifier_log. While we're touching the function signature, mark the 'prog' argument to bpf_check_type_match() as const. Also enhance the bpf_verifier_log_needed() check to handle NULL pointers for the log struct so we can re-use the code with logging disabled. Acked-by: Andrii Nakryiko Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- include/linux/bpf_verifier.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index db6dcdee7933..5176726f4f03 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1420,7 +1420,7 @@ int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *reg); -int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog, +int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, struct btf *btf, const struct btf_type *t); struct bpf_prog *bpf_prog_by_id(u32 id); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 2bb48a2c4d08..7bc9276c4ef4 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -347,8 +347,9 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) { - return (log->level && log->ubuf && !bpf_verifier_log_full(log)) || - log->level == BPF_LOG_KERNEL; + return log && + ((log->level && log->ubuf && !bpf_verifier_log_full(log)) || + log->level == BPF_LOG_KERNEL); } #define BPF_MAX_SUBPROGS 256 -- cgit v1.2.3 From f7b12b6fea00988496b7606d4964cd77beef46a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 25 Sep 2020 23:25:02 +0200 Subject: bpf: verifier: refactor check_attach_btf_id() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The check_attach_btf_id() function really does three things: 1. It performs a bunch of checks on the program to ensure that the attachment is valid. 2. It stores a bunch of state about the attachment being requested in the verifier environment and struct bpf_prog objects. 3. It allocates a trampoline for the attachment. This patch splits out (1.) and (3.) into separate functions which will perform the checks, but return the computed values instead of directly modifying the environment. This is done in preparation for reusing the checks when the actual attachment is happening, which will allow tracing programs to have multiple (compatible) attachments. This also fixes a bug where a bunch of checks were skipped if a trampoline already existed for the tracing target. Fixes: 6ba43b761c41 ("bpf: Attachment verification for BPF_MODIFY_RETURN") Fixes: 1e6c62a88215 ("bpf: Introduce sleepable BPF programs") Acked-by: Andrii Nakryiko Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 19 ++++++++++++++----- include/linux/bpf_verifier.h | 13 +++++++++++++ 2 files changed, 27 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5176726f4f03..b89a30764069 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -606,6 +606,13 @@ struct bpf_trampoline { struct bpf_ksym ksym; }; +struct bpf_attach_target_info { + struct btf_func_model fmodel; + long tgt_addr; + const char *tgt_name; + const struct btf_type *tgt_type; +}; + #define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */ struct bpf_dispatcher_prog { @@ -633,9 +640,10 @@ static __always_inline unsigned int bpf_dispatcher_nop_func( return bpf_func(ctx, insnsi); } #ifdef CONFIG_BPF_JIT -struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); int bpf_trampoline_unlink_prog(struct bpf_prog *prog); +struct bpf_trampoline *bpf_trampoline_get(u64 key, + struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ @@ -680,10 +688,6 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); #else -static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) -{ - return NULL; -} static inline int bpf_trampoline_link_prog(struct bpf_prog *prog) { return -ENOTSUPP; @@ -692,6 +696,11 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) { return -ENOTSUPP; } +static inline struct bpf_trampoline *bpf_trampoline_get(u64 key, + struct bpf_attach_target_info *tgt_info) +{ + return ERR_PTR(-EOPNOTSUPP); +} static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #define DEFINE_BPF_DISPATCHER(name) #define DECLARE_BPF_DISPATCHER(name) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7bc9276c4ef4..363b4f1c562a 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -450,4 +450,17 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); int check_ctx_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno); +/* this lives here instead of in bpf.h because it needs to dereference tgt_prog */ +static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, + u32 btf_id) +{ + return tgt_prog ? (((u64)tgt_prog->aux->id) << 32 | btf_id) : btf_id; +} + +int bpf_check_attach_target(struct bpf_verifier_log *log, + const struct bpf_prog *prog, + const struct bpf_prog *tgt_prog, + u32 btf_id, + struct bpf_attach_target_info *tgt_info); + #endif /* _LINUX_BPF_VERIFIER_H */ -- cgit v1.2.3 From 76654e67f3a01c50dc13dd6dea75e58943413956 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Mon, 28 Sep 2020 12:31:03 +0100 Subject: bpf: Provide function to get vmlinux BTF information It will be used later for BPF structure display support Signed-off-by: Alan Maguire Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1601292670-1616-2-git-send-email-alan.maguire@oracle.com --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b89a30764069..e620a4b1290f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1364,6 +1364,8 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, union bpf_attr __user *uattr); void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); +struct btf *bpf_get_btf_vmlinux(void); + /* Map specifics */ struct xdp_buff; struct sk_buff; -- cgit v1.2.3 From 31d0bc81637d8d974a6dad9827b765b4b70c89d7 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Mon, 28 Sep 2020 12:31:04 +0100 Subject: bpf: Move to generic BTF show support, apply it to seq files/strings generalize the "seq_show" seq file support in btf.c to support a generic show callback of which we support two instances; the current seq file show, and a show with snprintf() behaviour which instead writes the type data to a supplied string. Both classes of show function call btf_type_show() with different targets; the seq file or the string to be written. In the string case we need to track additional data - length left in string to write and length to return that we would have written (a la snprintf). By default show will display type information, field members and their types and values etc, and the information is indented based upon structure depth. Zeroed fields are omitted. Show however supports flags which modify its behaviour: BTF_SHOW_COMPACT - suppress newline/indent. BTF_SHOW_NONAME - suppress show of type and member names. BTF_SHOW_PTR_RAW - do not obfuscate pointer values. BTF_SHOW_UNSAFE - do not copy data to safe buffer before display. BTF_SHOW_ZERO - show zeroed values (by default they are not shown). Signed-off-by: Alan Maguire Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1601292670-1616-3-git-send-email-alan.maguire@oracle.com --- include/linux/btf.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index a9af5e7a7ece..d0f5d3c9ec3d 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -13,6 +13,7 @@ struct btf; struct btf_member; struct btf_type; union bpf_attr; +struct btf_show; extern const struct file_operations btf_fops; @@ -46,8 +47,43 @@ int btf_get_info_by_fd(const struct btf *btf, const struct btf_type *btf_type_id_size(const struct btf *btf, u32 *type_id, u32 *ret_size); + +/* + * Options to control show behaviour. + * - BTF_SHOW_COMPACT: no formatting around type information + * - BTF_SHOW_NONAME: no struct/union member names/types + * - BTF_SHOW_PTR_RAW: show raw (unobfuscated) pointer values; + * equivalent to %px. + * - BTF_SHOW_ZERO: show zero-valued struct/union members; they + * are not displayed by default + * - BTF_SHOW_UNSAFE: skip use of bpf_probe_read() to safely read + * data before displaying it. + */ +#define BTF_SHOW_COMPACT (1ULL << 0) +#define BTF_SHOW_NONAME (1ULL << 1) +#define BTF_SHOW_PTR_RAW (1ULL << 2) +#define BTF_SHOW_ZERO (1ULL << 3) +#define BTF_SHOW_UNSAFE (1ULL << 4) + void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m); + +/* + * Copy len bytes of string representation of obj of BTF type_id into buf. + * + * @btf: struct btf object + * @type_id: type id of type obj points to + * @obj: pointer to typed data + * @buf: buffer to write to + * @len: maximum length to write to buf + * @flags: show options (see above) + * + * Return: length that would have been/was copied as per snprintf, or + * negative error. + */ +int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj, + char *buf, int len, u64 flags); + int btf_get_fd_by_id(u32 id); u32 btf_id(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, -- cgit v1.2.3 From c4d0bfb45068d853a478b9067a95969b1886a30f Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Mon, 28 Sep 2020 12:31:05 +0100 Subject: bpf: Add bpf_snprintf_btf helper A helper is added to support tracing kernel type information in BPF using the BPF Type Format (BTF). Its signature is long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags); struct btf_ptr * specifies - a pointer to the data to be traced - the BTF id of the type of data pointed to - a flags field is provided for future use; these flags are not to be confused with the BTF_F_* flags below that control how the btf_ptr is displayed; the flags member of the struct btf_ptr may be used to disambiguate types in kernel versus module BTF, etc; the main distinction is the flags relate to the type and information needed in identifying it; not how it is displayed. For example a BPF program with a struct sk_buff *skb could do the following: static struct btf_ptr b = { }; b.ptr = skb; b.type_id = __builtin_btf_type_id(struct sk_buff, 1); bpf_snprintf_btf(str, sizeof(str), &b, sizeof(b), 0, 0); Default output looks like this: (struct sk_buff){ .transport_header = (__u16)65535, .mac_header = (__u16)65535, .end = (sk_buff_data_t)192, .head = (unsigned char *)0x000000007524fd8b, .data = (unsigned char *)0x000000007524fd8b, .truesize = (unsigned int)768, .users = (refcount_t){ .refs = (atomic_t){ .counter = (int)1, }, }, } Flags modifying display are as follows: - BTF_F_COMPACT: no formatting around type information - BTF_F_NONAME: no struct/union member names/types - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values; equivalent to %px. - BTF_F_ZERO: show zero-valued struct/union members; they are not displayed by default Signed-off-by: Alan Maguire Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1601292670-1616-4-git-send-email-alan.maguire@oracle.com --- include/linux/bpf.h | 1 + include/linux/btf.h | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e620a4b1290f..768b533ba48e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1822,6 +1822,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; +extern const struct bpf_func_proto bpf_snprintf_btf_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/btf.h b/include/linux/btf.h index d0f5d3c9ec3d..3e5cdc2ba963 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -6,6 +6,7 @@ #include #include +#include #define BTF_TYPE_EMIT(type) ((void)(type *)0) @@ -59,10 +60,10 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, * - BTF_SHOW_UNSAFE: skip use of bpf_probe_read() to safely read * data before displaying it. */ -#define BTF_SHOW_COMPACT (1ULL << 0) -#define BTF_SHOW_NONAME (1ULL << 1) -#define BTF_SHOW_PTR_RAW (1ULL << 2) -#define BTF_SHOW_ZERO (1ULL << 3) +#define BTF_SHOW_COMPACT BTF_F_COMPACT +#define BTF_SHOW_NONAME BTF_F_NONAME +#define BTF_SHOW_PTR_RAW BTF_F_PTR_RAW +#define BTF_SHOW_ZERO BTF_F_ZERO #define BTF_SHOW_UNSAFE (1ULL << 4) void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, -- cgit v1.2.3 From eb411377aed9e27835e77ee0710ee8f4649958f3 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Mon, 28 Sep 2020 12:31:09 +0100 Subject: bpf: Add bpf_seq_printf_btf helper A helper is added to allow seq file writing of kernel data structures using vmlinux BTF. Its signature is long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags); Flags and struct btf_ptr definitions/use are identical to the bpf_snprintf_btf helper, and the helper returns 0 on success or a negative error value. Suggested-by: Alexei Starovoitov Signed-off-by: Alan Maguire Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1601292670-1616-8-git-send-email-alan.maguire@oracle.com --- include/linux/btf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index 3e5cdc2ba963..024e16ff7dcc 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -68,6 +68,8 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m); +int btf_type_seq_show_flags(const struct btf *btf, u32 type_id, void *obj, + struct seq_file *m, u64 flags); /* * Copy len bytes of string representation of obj of BTF type_id into buf. -- cgit v1.2.3 From b4c5f83ae3f3e2b3239751c304e424eace62448b Mon Sep 17 00:00:00 2001 From: Rusaimi Amira Ruslan Date: Mon, 28 Sep 2020 18:12:12 +0800 Subject: stmmac: intel: Adding ref clock 1us tic for LPI cntr Adding reference clock (1us tic) for all LPI timer on Intel platforms. The reference clock is derived from ptp clk. This also enables all LPI counter. Signed-off-by: Rusaimi Amira Ruslan Signed-off-by: Voon Weifeng Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 00e83c877496..628e28903b8b 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -200,5 +200,6 @@ struct plat_stmmacenet_data { int has_xgmac; bool vlan_fail_q_en; u8 vlan_fail_q; + unsigned int eee_usecs_rate; }; #endif -- cgit v1.2.3 From 7b38b7b0427df70237e3c9c73f2db6b99be2b4b9 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 8 Sep 2020 19:24:45 +0900 Subject: PM / devfreq: Add devfreq_get_devfreq_by_node function Split off part of devfreq_get_devfreq_by_phandle into a separate function. This allows callers to fetch devfreq instances by enumerating devicetree instead of explicit phandles. Acked-by: Krzysztof Kozlowski Reviewed-by: Lukasz Luba Signed-off-by: Leonard Crestez [cw00.choi: Export devfreq_get_devfreq_by_node function and add function to devfreq.h when CONFIG_PM_DEVFREQ is enabled.] Signed-off-by: Chanwoo Choi --- include/linux/devfreq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 12782fbb4c25..eb971b8e5051 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -261,6 +261,7 @@ void devm_devfreq_unregister_notifier(struct device *dev, struct devfreq *devfreq, struct notifier_block *nb, unsigned int list); +struct devfreq *devfreq_get_devfreq_by_node(struct device_node *node); struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index); #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) @@ -414,6 +415,11 @@ static inline void devm_devfreq_unregister_notifier(struct device *dev, { } +static inline struct devfreq *devfreq_get_devfreq_by_node(struct device_node *node) +{ + return ERR_PTR(-ENODEV); +} + static inline struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index) { -- cgit v1.2.3 From 86d90fd95bbc3b3fdc2ef0507b7324cd1d0a358b Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 8 Sep 2020 19:24:46 +0900 Subject: PM / devfreq: Change prototype of devfreq_get_devfreq_by_phandle function Previously, devfreq core support 'devfreq' property in order to get the devfreq device by phandle. But, 'devfreq' property name is not proper on devicetree binding because this name doesn't mean the any h/w attribute. The devfreq core hand over the right to decide the property name for getting the devfreq device on devicetree. Each devfreq driver will decide the property name on devicetree binding and pass the their own property name to devfreq_get_devfreq_by_phandle function. Acked-by: Krzysztof Kozlowski Signed-off-by: Chanwoo Choi --- include/linux/devfreq.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index eb971b8e5051..2f4a74efa6be 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -262,7 +262,8 @@ void devm_devfreq_unregister_notifier(struct device *dev, struct notifier_block *nb, unsigned int list); struct devfreq *devfreq_get_devfreq_by_node(struct device_node *node); -struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index); +struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, + const char *phandle_name, int index); #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) /** @@ -421,7 +422,7 @@ static inline struct devfreq *devfreq_get_devfreq_by_node(struct device_node *no } static inline struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, - int index) + const char *phandle_name, int index) { return ERR_PTR(-ENODEV); } -- cgit v1.2.3 From 02bdbf7d09c059b16047e3d4a05e6d584dd993b6 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 8 Sep 2020 19:24:47 +0900 Subject: PM / devfreq: event: Change prototype of devfreq_event_get_edev_by_phandle function Previously, devfreq core support 'devfreq-events' property in order to get the devfreq-event device by phandle. But, 'devfreq-events' property name is not proper on devicetree binding because this name doesn't mean the any h/w attribute. The devfreq-event core hand over the rights to decide the property name for getting the devfreq-event device on devicetree. Each devfreq-event driver will decide the property name on devicetree binding and then pass the their own property name to devfreq_event_get_edev_by_phandle function. And change the prototype of devfreq_event_get_edev_count function because of used deprecated 'devfreq-events' property. Acked-by: Krzysztof Kozlowski Signed-off-by: Chanwoo Choi --- include/linux/devfreq-event.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq-event.h b/include/linux/devfreq-event.h index f14f17f8cb7f..4a50a5c71a5f 100644 --- a/include/linux/devfreq-event.h +++ b/include/linux/devfreq-event.h @@ -106,8 +106,11 @@ extern int devfreq_event_get_event(struct devfreq_event_dev *edev, struct devfreq_event_data *edata); extern int devfreq_event_reset_event(struct devfreq_event_dev *edev); extern struct devfreq_event_dev *devfreq_event_get_edev_by_phandle( - struct device *dev, int index); -extern int devfreq_event_get_edev_count(struct device *dev); + struct device *dev, + const char *phandle_name, + int index); +extern int devfreq_event_get_edev_count(struct device *dev, + const char *phandle_name); extern struct devfreq_event_dev *devfreq_event_add_edev(struct device *dev, struct devfreq_event_desc *desc); extern int devfreq_event_remove_edev(struct devfreq_event_dev *edev); @@ -152,12 +155,15 @@ static inline int devfreq_event_reset_event(struct devfreq_event_dev *edev) } static inline struct devfreq_event_dev *devfreq_event_get_edev_by_phandle( - struct device *dev, int index) + struct device *dev, + const char *phandle_name, + int index) { return ERR_PTR(-EINVAL); } -static inline int devfreq_event_get_edev_count(struct device *dev) +static inline int devfreq_event_get_edev_count(struct device *dev, + const char *phandle_name) { return -EINVAL; } -- cgit v1.2.3 From 01fd30da0474d5528d96a154cb50ce3b0352fd19 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Sep 2020 13:55:58 +0200 Subject: dma-buf: Add struct dma-buf-map for storing struct dma_buf.vaddr_ptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new type struct dma_buf_map represents a mapping of dma-buf memory into kernel space. It contains a flag, is_iomem, that signals users to access the mapped memory with I/O operations instead of regular loads and stores. It was assumed that DMA buffer memory can be accessed with regular load and store operations. Some architectures, such as sparc64, require the use of I/O operations to access dma-map buffers that are located in I/O memory. Providing struct dma_buf_map allows drivers to implement this. This was specifically a problem when refreshing the graphics framebuffer on such systems. [1] As the first step, struct dma_buf stores an instance of struct dma_buf_map internally. Afterwards, dma-buf's vmap and vunmap interfaces are be converted. Finally, affected drivers can be fixed. v3: * moved documentation into separate patch * test for NULL pointers with ! [1] https://lore.kernel.org/dri-devel/20200725191012.GA434957@ravnborg.org/ Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Reviewed-by: Daniel Vetter Acked-by: Sumit Semwal Acked-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200925115601.23955-2-tzimmermann@suse.de --- include/linux/dma-buf-map.h | 82 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/dma-buf.h | 3 +- 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 include/linux/dma-buf-map.h (limited to 'include/linux') diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h new file mode 100644 index 000000000000..00143c88feb6 --- /dev/null +++ b/include/linux/dma-buf-map.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Pointer to dma-buf-mapped memory, plus helpers. + */ + +#ifndef __DMA_BUF_MAP_H__ +#define __DMA_BUF_MAP_H__ + +#include + +/** + * struct dma_buf_map - Pointer to vmap'ed dma-buf memory. + * @vaddr_iomem: The buffer's address if in I/O memory + * @vaddr: The buffer's address if in system memory + * @is_iomem: True if the dma-buf memory is located in I/O + * memory, or false otherwise. + */ +struct dma_buf_map { + union { + void __iomem *vaddr_iomem; + void *vaddr; + }; + bool is_iomem; +}; + +/* API transition helper */ +static inline bool dma_buf_map_is_vaddr(const struct dma_buf_map *map, const void *vaddr) +{ + return !map->is_iomem && (map->vaddr == vaddr); +} + +/** + * dma_buf_map_is_null - Tests for a dma-buf mapping to be NULL + * @map: The dma-buf mapping structure + * + * Depending on the state of struct dma_buf_map.is_iomem, tests if the + * mapping is NULL. + * + * Returns: + * True if the mapping is NULL, or false otherwise. + */ +static inline bool dma_buf_map_is_null(const struct dma_buf_map *map) +{ + if (map->is_iomem) + return !map->vaddr_iomem; + return !map->vaddr; +} + +/** + * dma_buf_map_is_set - Tests is the dma-buf mapping has been set + * @map: The dma-buf mapping structure + * + * Depending on the state of struct dma_buf_map.is_iomem, tests if the + * mapping has been set. + * + * Returns: + * True if the mapping is been set, or false otherwise. + */ +static inline bool dma_buf_map_is_set(const struct dma_buf_map *map) +{ + return !dma_buf_map_is_null(map); +} + +/** + * dma_buf_map_clear - Clears a dma-buf mapping structure + * @map: The dma-buf mapping structure + * + * Clears all fields to zero; including struct dma_buf_map.is_iomem. So + * mapping structures that were set to point to I/O memory are reset for + * system memory. Pointers are cleared to NULL. This is the default. + */ +static inline void dma_buf_map_clear(struct dma_buf_map *map) +{ + if (map->is_iomem) { + map->vaddr_iomem = NULL; + map->is_iomem = false; + } else { + map->vaddr = NULL; + } +} + +#endif /* __DMA_BUF_MAP_H__ */ diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 957b398d30e5..fcc2ddfb6d18 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -13,6 +13,7 @@ #ifndef __DMA_BUF_H__ #define __DMA_BUF_H__ +#include #include #include #include @@ -309,7 +310,7 @@ struct dma_buf { const struct dma_buf_ops *ops; struct mutex lock; unsigned vmapping_counter; - void *vmap_ptr; + struct dma_buf_map vmap_ptr; const char *exp_name; const char *name; spinlock_t name_lock; -- cgit v1.2.3 From 6619ccf1bb1d0ebb071f758111efa83918b216fc Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Sep 2020 13:55:59 +0200 Subject: dma-buf: Use struct dma_buf_map in dma_buf_vmap() interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch updates dma_buf_vmap() and dma-buf's vmap callback to use struct dma_buf_map. The interfaces used to return a buffer address. This address now gets stored in an instance of the structure that is given as an additional argument. The functions return an errno code on errors. Users of the functions are updated accordingly. This is only an interface change. It is currently expected that dma-buf memory can be accessed with system memory load/store operations. v3: * update fastrpc driver (kernel test robot) v2: * always clear map parameter in dma_buf_vmap() (Daniel) * include dma-buf-heaps and i915 selftests (kernel test robot) Signed-off-by: Thomas Zimmermann Acked-by: Sumit Semwal Acked-by: Christian König Acked-by: Daniel Vetter Acked-by: Tomasz Figa Acked-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200925115601.23955-3-tzimmermann@suse.de --- include/linux/dma-buf-map.h | 13 +++++++++++++ include/linux/dma-buf.h | 6 +++--- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h index 00143c88feb6..5ae732d373eb 100644 --- a/include/linux/dma-buf-map.h +++ b/include/linux/dma-buf-map.h @@ -23,6 +23,19 @@ struct dma_buf_map { bool is_iomem; }; +/** + * dma_buf_map_set_vaddr - Sets a dma-buf mapping structure to an address in system memory + * @map: The dma-buf mapping structure + * @vaddr: A system-memory address + * + * Sets the address and clears the I/O-memory flag. + */ +static inline void dma_buf_map_set_vaddr(struct dma_buf_map *map, void *vaddr) +{ + map->vaddr = vaddr; + map->is_iomem = false; +} + /* API transition helper */ static inline bool dma_buf_map_is_vaddr(const struct dma_buf_map *map, const void *vaddr) { diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index fcc2ddfb6d18..7237997cfa38 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -266,7 +266,7 @@ struct dma_buf_ops { */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma); - void *(*vmap)(struct dma_buf *); + int (*vmap)(struct dma_buf *dmabuf, struct dma_buf_map *map); void (*vunmap)(struct dma_buf *, void *vaddr); }; @@ -503,6 +503,6 @@ int dma_buf_end_cpu_access(struct dma_buf *dma_buf, int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, unsigned long); -void *dma_buf_vmap(struct dma_buf *); -void dma_buf_vunmap(struct dma_buf *, void *vaddr); +int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map); +void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr); #endif /* __DMA_BUF_H__ */ -- cgit v1.2.3 From 20e76f1a70596590dec32a5d1f598fba04859526 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Sep 2020 13:56:00 +0200 Subject: dma-buf: Use struct dma_buf_map in dma_buf_vunmap() interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch updates dma_buf_vunmap() and dma-buf's vunmap callback to use struct dma_buf_map. The interfaces used to receive a buffer address. This address is now given in an instance of the structure. Users of the functions are updated accordingly. This is only an interface change. It is currently expected that dma-buf memory can be accessed with system memory load/store operations. v2: * include dma-buf-heaps and i915 selftests (kernel test robot) * initialize cma_obj before using it in drm_gem_cma_free_object() (kernel test robot) Signed-off-by: Thomas Zimmermann Acked-by: Sumit Semwal Acked-by: Christian König Acked-by: Daniel Vetter Acked-by: Tomasz Figa Acked-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200925115601.23955-4-tzimmermann@suse.de --- include/linux/dma-buf-map.h | 32 +++++++++++++++++++++++++++++--- include/linux/dma-buf.h | 4 ++-- 2 files changed, 31 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h index 5ae732d373eb..c173a4abf4ba 100644 --- a/include/linux/dma-buf-map.h +++ b/include/linux/dma-buf-map.h @@ -23,6 +23,16 @@ struct dma_buf_map { bool is_iomem; }; +/** + * DMA_BUF_MAP_INIT_VADDR - Initializes struct dma_buf_map to an address in system memory + * @vaddr: A system-memory address + */ +#define DMA_BUF_MAP_INIT_VADDR(vaddr_) \ + { \ + .vaddr = (vaddr_), \ + .is_iomem = false, \ + } + /** * dma_buf_map_set_vaddr - Sets a dma-buf mapping structure to an address in system memory * @map: The dma-buf mapping structure @@ -36,10 +46,26 @@ static inline void dma_buf_map_set_vaddr(struct dma_buf_map *map, void *vaddr) map->is_iomem = false; } -/* API transition helper */ -static inline bool dma_buf_map_is_vaddr(const struct dma_buf_map *map, const void *vaddr) +/** + * dma_buf_map_is_equal - Compares two dma-buf mapping structures for equality + * @lhs: The dma-buf mapping structure + * @rhs: A dma-buf mapping structure to compare with + * + * Two dma-buf mapping structures are equal if they both refer to the same type of memory + * and to the same address within that memory. + * + * Returns: + * True is both structures are equal, or false otherwise. + */ +static inline bool dma_buf_map_is_equal(const struct dma_buf_map *lhs, + const struct dma_buf_map *rhs) { - return !map->is_iomem && (map->vaddr == vaddr); + if (lhs->is_iomem != rhs->is_iomem) + return false; + else if (lhs->is_iomem) + return lhs->vaddr_iomem == rhs->vaddr_iomem; + else + return lhs->vaddr == rhs->vaddr; } /** diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 7237997cfa38..cf77cc15f4ba 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -267,7 +267,7 @@ struct dma_buf_ops { int (*mmap)(struct dma_buf *, struct vm_area_struct *vma); int (*vmap)(struct dma_buf *dmabuf, struct dma_buf_map *map); - void (*vunmap)(struct dma_buf *, void *vaddr); + void (*vunmap)(struct dma_buf *dmabuf, struct dma_buf_map *map); }; /** @@ -504,5 +504,5 @@ int dma_buf_end_cpu_access(struct dma_buf *dma_buf, int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, unsigned long); int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map); -void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr); +void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map); #endif /* __DMA_BUF_H__ */ -- cgit v1.2.3 From ccc22d41bd9acec58cdc7c3b012e1887aba40af4 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Sep 2020 13:56:01 +0200 Subject: dma-buf: Document struct dma_buf_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds struct dma_buf_map and its helpers to the documentation. A short tutorial is included. v3: * update documentation in a separate patch * expand docs (Daniel) * carry-over acks from patch 1 Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Reviewed-by: Daniel Vetter Acked-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20200925115601.23955-5-tzimmermann@suse.de --- include/linux/dma-buf-map.h | 72 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h index c173a4abf4ba..fd1aba545fdf 100644 --- a/include/linux/dma-buf-map.h +++ b/include/linux/dma-buf-map.h @@ -8,6 +8,78 @@ #include +/** + * DOC: overview + * + * Calling dma-buf's vmap operation returns a pointer to the buffer's memory. + * Depending on the location of the buffer, users may have to access it with + * I/O operations or memory load/store operations. For example, copying to + * system memory could be done with memcpy(), copying to I/O memory would be + * done with memcpy_toio(). + * + * .. code-block:: c + * + * void *vaddr = ...; // pointer to system memory + * memcpy(vaddr, src, len); + * + * void *vaddr_iomem = ...; // pointer to I/O memory + * memcpy_toio(vaddr, _iomem, src, len); + * + * When using dma-buf's vmap operation, the returned pointer is encoded as + * :c:type:`struct dma_buf_map `. + * :c:type:`struct dma_buf_map ` stores the buffer's address in + * system or I/O memory and a flag that signals the required method of + * accessing the buffer. Use the returned instance and the helper functions + * to access the buffer's memory in the correct way. + * + * Open-coding access to :c:type:`struct dma_buf_map ` is + * considered bad style. Rather then accessing its fields directly, use one + * of the provided helper functions, or implement your own. For example, + * instances of :c:type:`struct dma_buf_map ` can be initialized + * statically with DMA_BUF_MAP_INIT_VADDR(), or at runtime with + * dma_buf_map_set_vaddr(). These helpers will set an address in system memory. + * + * .. code-block:: c + * + * struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(0xdeadbeaf); + * + * dma_buf_map_set_vaddr(&map. 0xdeadbeaf); + * + * Test if a mapping is valid with either dma_buf_map_is_set() or + * dma_buf_map_is_null(). + * + * .. code-block:: c + * + * if (dma_buf_map_is_set(&map) != dma_buf_map_is_null(&map)) + * // always true + * + * Instances of :c:type:`struct dma_buf_map ` can be compared + * for equality with dma_buf_map_is_equal(). Mappings the point to different + * memory spaces, system or I/O, are never equal. That's even true if both + * spaces are located in the same address space, both mappings contain the + * same address value, or both mappings refer to NULL. + * + * .. code-block:: c + * + * struct dma_buf_map sys_map; // refers to system memory + * struct dma_buf_map io_map; // refers to I/O memory + * + * if (dma_buf_map_is_equal(&sys_map, &io_map)) + * // always false + * + * Instances of struct dma_buf_map do not have to be cleaned up, but + * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings + * always refer to system memory. + * + * The type :c:type:`struct dma_buf_map ` and its helpers are + * actually independent from the dma-buf infrastructure. When sharing buffers + * among devices, drivers have to know the location of the memory to access + * the buffers in a safe way. :c:type:`struct dma_buf_map ` + * solves this problem for dma-buf and its users. If other drivers or + * sub-systems require similar functionality, the type could be generalized + * and moved to a more prominent header file. + */ + /** * struct dma_buf_map - Pointer to vmap'ed dma-buf memory. * @vaddr_iomem: The buffer's address if in I/O memory -- cgit v1.2.3 From 48404cf57852224c052e2e40d4dc50dd398c7a58 Mon Sep 17 00:00:00 2001 From: Michael Auchter Date: Tue, 22 Sep 2020 09:44:20 -0500 Subject: iio: adc: ad7291: convert to device tree There are no in-tree users of the platform data for this driver, so remove it and convert the driver to use device tree instead. Signed-off-by: Michael Auchter Link: https://lore.kernel.org/r/20200922144422.542669-1-michael.auchter@ni.com Signed-off-by: Jonathan Cameron --- include/linux/platform_data/ad7291.h | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 include/linux/platform_data/ad7291.h (limited to 'include/linux') diff --git a/include/linux/platform_data/ad7291.h b/include/linux/platform_data/ad7291.h deleted file mode 100644 index b1fd1530c9a5..000000000000 --- a/include/linux/platform_data/ad7291.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __IIO_AD7291_H__ -#define __IIO_AD7291_H__ - -/** - * struct ad7291_platform_data - AD7291 platform data - * @use_external_ref: Whether to use an external or internal reference voltage - */ -struct ad7291_platform_data { - bool use_external_ref; -}; - -#endif -- cgit v1.2.3 From 25918a9c641c3c73ae94354605860c06fd73a05e Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Wed, 23 Sep 2020 15:18:10 +0300 Subject: iio: buffer-dmaengine: remove non managed alloc/free This is to encourage the use of devm_iio_dmaengine_buffer_alloc(). Currently the managed version of the DMAEngine buffer alloc is the only function used from this part of the framework. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20200923121810.944075-1-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer-dmaengine.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer-dmaengine.h b/include/linux/iio/buffer-dmaengine.h index 0e503db71289..5b502291d6a4 100644 --- a/include/linux/iio/buffer-dmaengine.h +++ b/include/linux/iio/buffer-dmaengine.h @@ -10,10 +10,6 @@ struct iio_buffer; struct device; -struct iio_buffer *iio_dmaengine_buffer_alloc(struct device *dev, - const char *channel); -void iio_dmaengine_buffer_free(struct iio_buffer *buffer); - struct iio_buffer *devm_iio_dmaengine_buffer_alloc(struct device *dev, const char *channel); -- cgit v1.2.3 From 2e2366c2d14193d3b95bab1fb484a9377224962b Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Wed, 23 Sep 2020 16:03:39 +0300 Subject: iio: cros_ec: unify hw fifo attributes into the core file The intent here is to minimize the use of iio_buffer_set_attrs(). Since we are planning to add support for multiple IIO buffers per IIO device, the issue has to do with: 1. Accessing 'indio_dev->buffer' directly (as is done with 'iio_buffer_set_attrs(indio_dev->buffer, )'). 2. The way that the buffer attributes would get handled or expanded when there are more buffers per IIO device. Current a sysfs kobj_type expands into a 'device' object that expands into an 'iio_dev' object. We will need to change this, so that the sysfs attributes for IIO buffers expand into IIO buffers at some point. Right now, the current IIO framework works fine for the '1 IIO device == 1 IIO buffer' case (that is now). Signed-off-by: Alexandru Ardelean Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200923130339.997902-1-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/common/cros_ec_sensors_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h index caa8bb279a34..c9b80be82440 100644 --- a/include/linux/iio/common/cros_ec_sensors_core.h +++ b/include/linux/iio/common/cros_ec_sensors_core.h @@ -96,7 +96,8 @@ struct platform_device; int cros_ec_sensors_core_init(struct platform_device *pdev, struct iio_dev *indio_dev, bool physical_device, cros_ec_sensors_capture_t trigger_capture, - cros_ec_sensorhub_push_data_cb_t push_data); + cros_ec_sensorhub_push_data_cb_t push_data, + bool has_hw_fifo); irqreturn_t cros_ec_sensors_capture(int irq, void *p); int cros_ec_sensors_push_data(struct iio_dev *indio_dev, @@ -125,6 +126,5 @@ extern const struct dev_pm_ops cros_ec_sensors_pm_ops; /* List of extended channel specification for all sensors. */ extern const struct iio_chan_spec_ext_info cros_ec_sensors_ext_info[]; -extern const struct attribute *cros_ec_sensor_fifo_attributes[]; #endif /* __CROS_EC_SENSORS_CORE_H */ -- cgit v1.2.3 From 232f4eb6393f42f7f9418560ae9228e747fc6faf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 23 Sep 2020 09:56:14 +0200 Subject: efi: pstore: disentangle from deprecated efivars module The EFI pstore implementation relies on the 'efivars' abstraction, which encapsulates the EFI variable store in a way that can be overridden by other backing stores, like the Google SMI one. On top of that, the EFI pstore implementation also relies on the efivars.ko module, which is a separate layer built on top of the 'efivars' abstraction that exposes the [deprecated] sysfs entries for each variable that exists in the backing store. Since the efivars.ko module is deprecated, and all users appear to have moved to the efivarfs file system instead, let's prepare for its removal, by removing EFI pstore's dependency on it. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 4a2332f146eb..7066c11ab82f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -986,8 +986,6 @@ struct efivar_entry { bool deleting; }; -extern struct list_head efivar_sysfs_list; - static inline void efivar_unregister(struct efivar_entry *var) { @@ -1045,8 +1043,6 @@ void efivar_run_worker(void); #if defined(CONFIG_EFI_VARS) || defined(CONFIG_EFI_VARS_MODULE) int efivars_sysfs_init(void); -#define EFIVARS_DATA_SIZE_MAX 1024 - #endif /* CONFIG_EFI_VARS */ extern bool efi_capsule_pending(int *reset_type); -- cgit v1.2.3 From c9b51a2dbfe7f47643e133bf48e1bf28f1b85d2a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 23 Sep 2020 10:07:49 +0200 Subject: efi: pstore: move workqueue handling out of efivars The worker thread that gets kicked off to sync the state of the EFI variable list is only used by the EFI pstore implementation, and is defined in its source file. So let's move its scheduling there as well. Since our efivar_init() scan will bail on duplicate entries, there is no need to disable the workqueue like we did before, so we can run it unconditionally. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 7066c11ab82f..ab8c80331217 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1037,9 +1037,6 @@ bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, bool efivar_variable_is_removable(efi_guid_t vendor, const char *name, size_t len); -extern struct work_struct efivar_work; -void efivar_run_worker(void); - #if defined(CONFIG_EFI_VARS) || defined(CONFIG_EFI_VARS_MODULE) int efivars_sysfs_init(void); -- cgit v1.2.3 From 5d3c8617ccee6387ba73a5dba77fb9dc21cb85f4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 23 Sep 2020 10:13:07 +0200 Subject: efi: efivars: un-export efivars_sysfs_init() efivars_sysfs_init() is only used locally in the source file that defines it, so make it static and unexport it. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index ab8c80331217..4c8dae0b5c75 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1037,10 +1037,6 @@ bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, bool efivar_variable_is_removable(efi_guid_t vendor, const char *name, size_t len); -#if defined(CONFIG_EFI_VARS) || defined(CONFIG_EFI_VARS_MODULE) -int efivars_sysfs_init(void); - -#endif /* CONFIG_EFI_VARS */ extern bool efi_capsule_pending(int *reset_type); extern int efi_capsule_supported(efi_guid_t guid, u32 flags, -- cgit v1.2.3 From a5d02e901e6dadd7dd60fafb6448a822a47430ff Mon Sep 17 00:00:00 2001 From: Vaibhav Gupta Date: Fri, 31 Jul 2020 01:14:16 +0530 Subject: PCI/PM: Remove unused pcibios_pm_ops The "struct dev_pm_ops pcibios_pm_ops", declared in include/linux/pci.h and defined in drivers/pci/pci-driver.c, provided arch-specific hooks when a PCI device was doing a hibernate transition. 394216275c7d ("s390: remove broken hibernate / power management support") removed the last use of pcibios_pm_ops, so remove it completely. [bhelgaas: drop unused "error"] Link: https://lore.kernel.org/r/20200730194416.1029509-1-vaibhavgupta40@gmail.com Reported-by: Bjorn Helgaas Signed-off-by: Vaibhav Gupta Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 835530605c0d..c9e169c4e216 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2034,10 +2034,6 @@ int pcibios_alloc_irq(struct pci_dev *dev); void pcibios_free_irq(struct pci_dev *dev); resource_size_t pcibios_default_alignment(void); -#ifdef CONFIG_HIBERNATE_CALLBACKS -extern struct dev_pm_ops pcibios_pm_ops; -#endif - #if defined(CONFIG_PCI_MMCONFIG) || defined(CONFIG_ACPI_MCFG) void __init pci_mmcfg_early_init(void); void __init pci_mmcfg_late_init(void); -- cgit v1.2.3 From 3789af9a13e5561738c0f2114e3a5e22c843ca3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Thu, 30 Jul 2020 21:08:48 +0000 Subject: PCI/PM: Rename pci_dev.d3_delay to d3hot_delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCI devices support two variants of the D3 power state: D3hot (main power present) D3cold (main power removed). Previously struct pci_dev contained: unsigned int d3_delay; /* D3->D0 transition time in ms */ unsigned int d3cold_delay; /* D3cold->D0 transition time in ms */ "d3_delay" refers specifically to the D3hot state. Rename it to "d3hot_delay" to avoid ambiguity and align with the ACPI "_DSM for Specifying Device Readiness Durations" in the PCI Firmware spec r3.2, sec 4.6.9. There is no change to the functionality. Link: https://lore.kernel.org/r/20200730210848.1578826-1-kw@linux.com Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index c9e169c4e216..bea1a03faab6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -373,7 +373,7 @@ struct pci_dev { user sysfs */ unsigned int clear_retrain_link:1; /* Need to clear Retrain Link bit manually */ - unsigned int d3_delay; /* D3->D0 transition time in ms */ + unsigned int d3hot_delay; /* D3hot->D0 transition time in ms */ unsigned int d3cold_delay; /* D3cold->D0 transition time in ms */ #ifdef CONFIG_PCIEASPM -- cgit v1.2.3 From 3aac1ead5eb6b76f3d2b8d111e6de1c2de23fb34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 29 Sep 2020 14:45:50 +0200 Subject: bpf: Move prog->aux->linked_prog and trampoline into bpf_link on attach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for allowing multiple attachments of freplace programs, move the references to the target program and trampoline into the bpf_tracing_link structure when that is created. To do this atomically, introduce a new mutex in prog->aux to protect writing to the two pointers to target prog and trampoline, and rename the members to make it clear that they are related. With this change, it is no longer possible to attach the same tracing program multiple times (detaching in-between), since the reference from the tracing program to the target disappears on the first attach. However, since the next patch will let the caller supply an attach target, that will also make it possible to attach to the same place multiple times. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/160138355059.48470.2503076992210324984.stgit@toke.dk --- include/linux/bpf.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 768b533ba48e..839dd8670a7a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -640,8 +640,8 @@ static __always_inline unsigned int bpf_dispatcher_nop_func( return bpf_func(ctx, insnsi); } #ifdef CONFIG_BPF_JIT -int bpf_trampoline_link_prog(struct bpf_prog *prog); -int bpf_trampoline_unlink_prog(struct bpf_prog *prog); +int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); +int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); @@ -688,11 +688,13 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); #else -static inline int bpf_trampoline_link_prog(struct bpf_prog *prog) +static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, + struct bpf_trampoline *tr) { return -ENOTSUPP; } -static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) +static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog, + struct bpf_trampoline *tr) { return -ENOTSUPP; } @@ -763,7 +765,9 @@ struct bpf_prog_aux { u32 max_rdonly_access; u32 max_rdwr_access; const struct bpf_ctx_arg_aux *ctx_arg_info; - struct bpf_prog *linked_prog; + struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */ + struct bpf_prog *dst_prog; + struct bpf_trampoline *dst_trampoline; bool verifier_zext; /* Zero extensions has been inserted by verifier. */ bool offload_requested; bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ @@ -771,7 +775,6 @@ struct bpf_prog_aux { bool sleepable; bool tail_call_reachable; enum bpf_tramp_prog_type trampoline_prog_type; - struct bpf_trampoline *trampoline; struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; -- cgit v1.2.3 From 4a1e7c0c63e02daad751842b7880f9bbcdfb6e89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 29 Sep 2020 14:45:51 +0200 Subject: bpf: Support attaching freplace programs to multiple attach points MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables support for attaching freplace programs to multiple attach points. It does this by amending the UAPI for bpf_link_Create with a target btf ID that can be used to supply the new attachment point along with the target program fd. The target must be compatible with the target that was supplied at program load time. The implementation reuses the checks that were factored out of check_attach_btf_id() to ensure compatibility between the BTF types of the old and new attachment. If these match, a new bpf_tracing_link will be created for the new attach target, allowing multiple attachments to co-exist simultaneously. The code could theoretically support multiple-attach of other types of tracing programs as well, but since I don't have a use case for any of those, there is no API support for doing so. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/160138355169.48470.17165680973640685368.stgit@toke.dk --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 839dd8670a7a..50e5c4b52bd1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -768,6 +768,8 @@ struct bpf_prog_aux { struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */ struct bpf_prog *dst_prog; struct bpf_trampoline *dst_trampoline; + enum bpf_prog_type saved_dst_prog_type; + enum bpf_attach_type saved_dst_attach_type; bool verifier_zext; /* Zero extensions has been inserted by verifier. */ bool offload_requested; bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ -- cgit v1.2.3 From c11171a413385c1a72291cdb4a7435cb189762ab Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 29 Sep 2020 22:25:12 +0200 Subject: net: Add netif_rx_any_context() Quite some drivers make conditional decisions based on in_interrupt() to invoke either netif_rx() or netif_rx_ni(). Conditionals based on in_interrupt() or other variants of preempt count checks in drivers should not exist for various reasons and Linus clearly requested to either split the code pathes or pass an argument to the common functions which provides the context. This is obviously the correct solution, but for some of the affected drivers this needs a major rewrite due to their convoluted structure. As in_interrupt() usage in drivers needs to be phased out, provide netif_rx_any_context() as a stop gap for these drivers. This confines the in_interrupt() conditional to core code which in turn allows to remove the access to this check for driver code and provides one central place to do further modifications once the driver maze is cleaned up. Suggested-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a431c3229cbf..28cfa53daf72 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3785,6 +3785,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); int netif_rx(struct sk_buff *skb); int netif_rx_ni(struct sk_buff *skb); +int netif_rx_any_context(struct sk_buff *skb); int netif_receive_skb(struct sk_buff *skb); int netif_receive_skb_core(struct sk_buff *skb); void netif_receive_skb_list(struct list_head *head); -- cgit v1.2.3 From c9ca43d42ed8d5fd635d327a664ed1d8579eb2af Mon Sep 17 00:00:00 2001 From: Paras Sharma Date: Wed, 30 Sep 2020 11:35:26 +0530 Subject: serial: qcom_geni_serial: To correct QUP Version detection logic For QUP IP versions 2.5 and above the oversampling rate is halved from 32 to 16. Commit ce734600545f ("tty: serial: qcom_geni_serial: Update the oversampling rate") is pushed to handle this scenario. But the existing logic is failing to classify QUP Version 3.0 into the correct group ( 2.5 and above). As result Serial Engine clocks are not configured properly for baud rate and garbage data is sampled to FIFOs from the line. So, fix the logic to detect QUP with versions 2.5 and above. Fixes: ce734600545f ("tty: serial: qcom_geni_serial: Update the oversampling rate") Cc: stable Signed-off-by: Paras Sharma Reviewed-by: Akash Asthana Link: https://lore.kernel.org/r/1601445926-23673-1-git-send-email-parashar@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- include/linux/qcom-geni-se.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h index 8f385fbe5a0e..1c31f26ccc7a 100644 --- a/include/linux/qcom-geni-se.h +++ b/include/linux/qcom-geni-se.h @@ -248,6 +248,9 @@ struct geni_se { #define GENI_SE_VERSION_MINOR(ver) ((ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT) #define GENI_SE_VERSION_STEP(ver) (ver & HW_VER_STEP_MASK) +/* QUP SE VERSION value for major number 2 and minor number 5 */ +#define QUP_SE_VERSION_2_5 0x20050000 + /* * Define bandwidth thresholds that cause the underlying Core 2X interconnect * clock to run at the named frequency. These baseline values are recommended -- cgit v1.2.3 From 714fb2fbe7379d0a413be217194f7af9814f2079 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Thu, 24 Sep 2020 13:42:11 +0530 Subject: mtd: hyperbus: Provide per device private pointer Provide per device private pointer that can be used by controller drivers to store device specific private data. Signed-off-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/20200924081214.16934-2-vigneshr@ti.com --- include/linux/mtd/hyperbus.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/hyperbus.h b/include/linux/mtd/hyperbus.h index 2129f7d3b6eb..d8cb1aec826d 100644 --- a/include/linux/mtd/hyperbus.h +++ b/include/linux/mtd/hyperbus.h @@ -20,6 +20,7 @@ enum hyperbus_memtype { * @mtd: pointer to MTD struct * @ctlr: pointer to HyperBus controller struct * @memtype: type of memory device: HyperFlash or HyperRAM + * @priv: pointer to controller specific per device private data */ struct hyperbus_device { @@ -28,6 +29,7 @@ struct hyperbus_device { struct mtd_info *mtd; struct hyperbus_ctlr *ctlr; enum hyperbus_memtype memtype; + void *priv; }; /** -- cgit v1.2.3 From 0fd16012adc0a994a7ce980a78e22e4de6220778 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 29 Sep 2020 12:09:55 +0200 Subject: lib: string_helpers: provide kfree_strarray() There's a common pattern of dynamically allocating an array of char pointers and then also dynamically allocating each string in this array. Provide a helper for freeing such a string array with one call. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko --- include/linux/string_helpers.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 86f150c2a6b6..fa06dcdc481e 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -94,4 +94,6 @@ char *kstrdup_quotable(const char *src, gfp_t gfp); char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp); char *kstrdup_quotable_file(struct file *file, gfp_t gfp); +void kfree_strarray(char **array, size_t n); + #endif -- cgit v1.2.3 From 1e3b37aab958861a9d0c01ff6dbec96a82743701 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Aug 2020 10:52:05 +0200 Subject: mtd: rawnand: Use the ECC framework OOB layouts No need to have our own in the raw NAND core. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200827085208.16276-18-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 967b616c50df..1bbce6fa1b08 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -14,6 +14,7 @@ #define __LINUX_MTD_RAWNAND_H #include +#include #include #include #include @@ -1156,9 +1157,6 @@ struct nand_chip { void *priv; }; -extern const struct mtd_ooblayout_ops nand_ooblayout_sp_ops; -extern const struct mtd_ooblayout_ops nand_ooblayout_lp_ops; - static inline struct nand_chip *mtd_to_nand(struct mtd_info *mtd) { return container_of(mtd, struct nand_chip, base.mtd); -- cgit v1.2.3 From d7157ff49a5b5845b37b8f2bf31607f0af295ef1 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Aug 2020 10:52:07 +0200 Subject: mtd: rawnand: Use the ECC framework user input parsing bits Many helpers are generic to all NAND chips, they should not be raw-NAND specific, so use the generic ones. To avoid moving all the raw NAND core "history" into the generic NAND layer, we keep a part of this parsing in the raw NAND core to ensure backward compatibility. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200827085208.16276-20-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 1bbce6fa1b08..671e60948deb 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -81,18 +81,6 @@ struct nand_chip; #define NAND_DATA_IFACE_CHECK_ONLY -1 -/* - * Constants for ECC_MODES - */ -enum nand_ecc_mode { - NAND_ECC_INVALID, - NAND_ECC_NONE, - NAND_ECC_SOFT, - NAND_ECC_HW, - NAND_ECC_HW_SYNDROME, - NAND_ECC_ON_DIE, -}; - /* * Constants for Hardware ECC */ -- cgit v1.2.3 From b5156335ac37f186812090795ed27884a76c3266 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 27 Aug 2020 10:52:08 +0200 Subject: mtd: rawnand: Use the NAND framework user_conf object for ECC flags Instead of storing the ECC flags in chip->ecc.options, use nanddev->ecc.user_conf.flags. There is currently only one to save: NAND_ECC_MAXIMIZE. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200827085208.16276-21-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 671e60948deb..aac07940de09 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -98,7 +98,6 @@ struct nand_chip; * pages and you want to rely on the default implementation. */ #define NAND_ECC_GENERIC_ERASED_CHECK BIT(0) -#define NAND_ECC_MAXIMIZE BIT(1) /* * Option constants for bizarre disfunctionality and real -- cgit v1.2.3 From 4b6ec08fd21ee3179cbfccf3605ad13d9f38b623 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 2 Sep 2020 16:22:59 +0200 Subject: mfd: lp87565: Add LP87524-Q1 variant Add support for the LP87524B/J/P-Q1 Four 4-MHz Buck Converter. This is a variant of the LP87565 having 4 single-phase outputs and up to 10 A of total output current. Signed-off-by: Luca Ceresoli Signed-off-by: Lee Jones --- include/linux/mfd/lp87565.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/lp87565.h b/include/linux/mfd/lp87565.h index 43716aca46fa..d44ddfb6bb63 100644 --- a/include/linux/mfd/lp87565.h +++ b/include/linux/mfd/lp87565.h @@ -14,6 +14,7 @@ enum lp87565_device_type { LP87565_DEVICE_TYPE_UNKNOWN = 0, + LP87565_DEVICE_TYPE_LP87524_Q1, LP87565_DEVICE_TYPE_LP87561_Q1, LP87565_DEVICE_TYPE_LP87565_Q1, }; -- cgit v1.2.3 From 876611c493b10cbb59e0e2143d3e744d0442de63 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Tue, 15 Sep 2020 11:44:21 +0800 Subject: mfd: intel-m10-bmc: Add Intel MAX 10 BMC chip support for Intel FPGA PAC This patch implements the basic functions of the BMC chip for some Intel FPGA PCIe Acceleration Cards (PAC). The BMC is implemented using the Intel MAX 10 CPLD. This BMC chip is connected to the FPGA by a SPI bus. To provide direct register access from the FPGA, the "SPI slave to Avalon Master Bridge" (spi-avmm) IP is integrated in the chip. It converts encoded streams of bytes from the host to the internal register read/write on the Avalon bus. So This driver uses the regmap-spi-avmm for register accessing. Signed-off-by: Xu Yilun Signed-off-by: Wu Hao Signed-off-by: Matthew Gerlach Signed-off-by: Russ Weight Reviewed-by: Tom Rix Signed-off-by: Lee Jones --- include/linux/mfd/intel-m10-bmc.h | 65 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 include/linux/mfd/intel-m10-bmc.h (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h new file mode 100644 index 000000000000..c8ef2f1654a4 --- /dev/null +++ b/include/linux/mfd/intel-m10-bmc.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Intel MAX 10 Board Management Controller chip. + * + * Copyright (C) 2018-2020 Intel Corporation, Inc. + */ +#ifndef __MFD_INTEL_M10_BMC_H +#define __MFD_INTEL_M10_BMC_H + +#include + +#define M10BMC_LEGACY_SYS_BASE 0x300400 +#define M10BMC_SYS_BASE 0x300800 +#define M10BMC_MEM_END 0x200000fc + +/* Register offset of system registers */ +#define NIOS2_FW_VERSION 0x0 +#define M10BMC_TEST_REG 0x3c +#define M10BMC_BUILD_VER 0x68 +#define M10BMC_VER_MAJOR_MSK GENMASK(23, 16) +#define M10BMC_VER_PCB_INFO_MSK GENMASK(31, 24) +#define M10BMC_VER_LEGACY_INVALID 0xffffffff + +/** + * struct intel_m10bmc - Intel MAX 10 BMC parent driver data structure + * @dev: this device + * @regmap: the regmap used to access registers by m10bmc itself + */ +struct intel_m10bmc { + struct device *dev; + struct regmap *regmap; +}; + +/* + * register access helper functions. + * + * m10bmc_raw_read - read m10bmc register per addr + * m10bmc_sys_read - read m10bmc system register per offset + */ +static inline int +m10bmc_raw_read(struct intel_m10bmc *m10bmc, unsigned int addr, + unsigned int *val) +{ + int ret; + + ret = regmap_read(m10bmc->regmap, addr, val); + if (ret) + dev_err(m10bmc->dev, "fail to read raw reg %x: %d\n", + addr, ret); + + return ret; +} + +/* + * The base of the system registers could be configured by HW developers, and + * in HW SPEC, the base is not added to the addresses of the system registers. + * + * This macro helps to simplify the accessing of the system registers. And if + * the base is reconfigured in HW, SW developers could simply change the + * M10BMC_SYS_BASE accordingly. + */ +#define m10bmc_sys_read(m10bmc, offset, val) \ + m10bmc_raw_read(m10bmc, M10BMC_SYS_BASE + (offset), val) + +#endif /* __MFD_INTEL_M10_BMC_H */ -- cgit v1.2.3 From 38b393fec298569c8ec154c750a107299bc8385d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Sun, 20 Sep 2020 00:15:36 +0200 Subject: leds: tca6507: Absorb platform data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only in-tree usage of this driver is via device-tree. No on else includes linux/leds-tca6507.h, so absorb the definition of platdata structure. Signed-off-by: Marek Behún Cc: NeilBrown Reviewed-by: Linus Walleij Tested-by: H. Nikolaus Schaller Signed-off-by: Pavel Machek --- include/linux/leds-tca6507.h | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 include/linux/leds-tca6507.h (limited to 'include/linux') diff --git a/include/linux/leds-tca6507.h b/include/linux/leds-tca6507.h deleted file mode 100644 index 50d330ed1100..000000000000 --- a/include/linux/leds-tca6507.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * TCA6507 LED chip driver. - * - * Copyright (C) 2011 Neil Brown - */ - -#ifndef __LINUX_TCA6507_H -#define __LINUX_TCA6507_H -#include - -struct tca6507_platform_data { - struct led_platform_data leds; -#ifdef CONFIG_GPIOLIB - int gpio_base; - void (*setup)(unsigned gpio_base, unsigned ngpio); -#endif -}; - -#define TCA6507_MAKE_GPIO 1 -#endif /* __LINUX_TCA6507_H*/ -- cgit v1.2.3 From 85fc8efe85d405b80904f73e4e23184a84283753 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Sun, 20 Sep 2020 02:24:59 +0200 Subject: leds: pca963x: register LEDs immediately after parsing, get rid of platdata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Register LEDs immediately after parsing their properties. This allows us to get rid of platdata, and since no one in tree uses header linux/platform_data/leds-pca963x.h, remove it. Signed-off-by: Marek Behún Cc: Peter Meerwald Cc: Ricardo Ribalda Cc: Zahari Petkov Signed-off-by: Pavel Machek --- include/linux/platform_data/leds-pca963x.h | 35 ------------------------------ 1 file changed, 35 deletions(-) delete mode 100644 include/linux/platform_data/leds-pca963x.h (limited to 'include/linux') diff --git a/include/linux/platform_data/leds-pca963x.h b/include/linux/platform_data/leds-pca963x.h deleted file mode 100644 index 6091337ce4bf..000000000000 --- a/include/linux/platform_data/leds-pca963x.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * PCA963X LED chip driver. - * - * Copyright 2012 bct electronic GmbH - * Copyright 2013 Qtechnology A/S - */ - -#ifndef __LINUX_PCA963X_H -#define __LINUX_PCA963X_H -#include - -enum pca963x_outdrv { - PCA963X_OPEN_DRAIN, - PCA963X_TOTEM_POLE, /* aka push-pull */ -}; - -enum pca963x_blink_type { - PCA963X_SW_BLINK, - PCA963X_HW_BLINK, -}; - -enum pca963x_direction { - PCA963X_NORMAL, - PCA963X_INVERTED, -}; - -struct pca963x_platform_data { - struct led_platform_data leds; - enum pca963x_outdrv outdrv; - enum pca963x_blink_type blink_type; - enum pca963x_direction dir; -}; - -#endif /* __LINUX_PCA963X_H*/ -- cgit v1.2.3 From 92acdc58ab11af66fcaef485433fde61b5e32fac Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Sep 2020 17:18:16 +0200 Subject: bpf, net: Rework cookie generator as per-cpu one With its use in BPF, the cookie generator can be called very frequently in particular when used out of cgroup v2 hooks (e.g. connect / sendmsg) and attached to the root cgroup, for example, when used in v1/v2 mixed environments. In particular, when there's a high churn on sockets in the system there can be many parallel requests to the bpf_get_socket_cookie() and bpf_get_netns_cookie() helpers which then cause contention on the atomic counter. As similarly done in f991bd2e1421 ("fs: introduce a per-cpu last_ino allocator"), add a small helper library that both can use for the 64 bit counters. Given this can be called from different contexts, we also need to deal with potential nested calls even though in practice they are considered extremely rare. One idea as suggested by Eric Dumazet was to use a reverse counter for this situation since we don't expect 64 bit overflows anyways; that way, we can avoid bigger gaps in the 64 bit counter space compared to just batch-wise increase. Even on machines with small number of cores (e.g. 4) the cookie generation shrinks from min/max/med/avg (ns) of 22/50/40/38.9 down to 10/35/14/17.3 when run in parallel from multiple CPUs. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Acked-by: Martin KaFai Lau Cc: Eric Dumazet Link: https://lore.kernel.org/bpf/8a80b8d27d3c49f9a14e1d5213c19d8be87d1dc8.1601477936.git.daniel@iogearbox.net --- include/linux/cookie.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sock_diag.h | 14 ++++++++++++- 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 include/linux/cookie.h (limited to 'include/linux') diff --git a/include/linux/cookie.h b/include/linux/cookie.h new file mode 100644 index 000000000000..0c159f585109 --- /dev/null +++ b/include/linux/cookie.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_COOKIE_H +#define __LINUX_COOKIE_H + +#include +#include +#include + +struct pcpu_gen_cookie { + local_t nesting; + u64 last; +} __aligned(16); + +struct gen_cookie { + struct pcpu_gen_cookie __percpu *local; + atomic64_t forward_last ____cacheline_aligned_in_smp; + atomic64_t reverse_last; +}; + +#define COOKIE_LOCAL_BATCH 4096 + +#define DEFINE_COOKIE(name) \ + static DEFINE_PER_CPU(struct pcpu_gen_cookie, __##name); \ + static struct gen_cookie name = { \ + .local = &__##name, \ + .forward_last = ATOMIC64_INIT(0), \ + .reverse_last = ATOMIC64_INIT(0), \ + } + +static __always_inline u64 gen_cookie_next(struct gen_cookie *gc) +{ + struct pcpu_gen_cookie *local = this_cpu_ptr(gc->local); + u64 val; + + if (likely(local_inc_return(&local->nesting) == 1)) { + val = local->last; + if (__is_defined(CONFIG_SMP) && + unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) { + s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH, + &gc->forward_last); + val = next - COOKIE_LOCAL_BATCH; + } + local->last = ++val; + } else { + val = atomic64_dec_return(&gc->reverse_last); + } + local_dec(&local->nesting); + return val; +} + +#endif /* __LINUX_COOKIE_H */ diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 15fe980a27ea..0b9ecd8cf979 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -25,7 +25,19 @@ void sock_diag_unregister(const struct sock_diag_handler *h); void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); -u64 sock_gen_cookie(struct sock *sk); +u64 __sock_gen_cookie(struct sock *sk); + +static inline u64 sock_gen_cookie(struct sock *sk) +{ + u64 cookie; + + preempt_disable(); + cookie = __sock_gen_cookie(sk); + preempt_enable(); + + return cookie; +} + int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie); void sock_diag_save_cookie(struct sock *sk, __u32 *cookie); -- cgit v1.2.3 From b4ab31414970a7a03a5d55d75083f2c101a30592 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Sep 2020 17:18:17 +0200 Subject: bpf: Add redirect_neigh helper as redirect drop-in Add a redirect_neigh() helper as redirect() drop-in replacement for the xmit side. Main idea for the helper is to be very similar in semantics to the latter just that the skb gets injected into the neighboring subsystem in order to let the stack do the work it knows best anyway to populate the L2 addresses of the packet and then hand over to dev_queue_xmit() as redirect() does. This solves two bigger items: i) skbs don't need to go up to the stack on the host facing veth ingress side for traffic egressing the container to achieve the same for populating L2 which also has the huge advantage that ii) the skb->sk won't get orphaned in ip_rcv_core() when entering the IP routing layer on the host stack. Given that skb->sk neither gets orphaned when crossing the netns as per 9c4c325252c5 ("skbuff: preserve sock reference when scrubbing the skb.") the helper can then push the skbs directly to the phys device where FQ scheduler can do its work and TCP stack gets proper backpressure given we hold on to skb->sk as long as skb is still residing in queues. With the helper used in BPF data path to then push the skb to the phys device, I observed a stable/consistent TCP_STREAM improvement on veth devices for traffic going container -> host -> host -> container from ~10Gbps to ~15Gbps for a single stream in my test environment. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Reviewed-by: David Ahern Acked-by: Martin KaFai Lau Cc: David Ahern Link: https://lore.kernel.org/bpf/f207de81629e1724899b73b8112e0013be782d35.1601477936.git.daniel@iogearbox.net --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 04a18e01b362..3d0cf3722bb4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2548,6 +2548,11 @@ static inline int skb_mac_header_was_set(const struct sk_buff *skb) return skb->mac_header != (typeof(skb->mac_header))~0U; } +static inline void skb_unset_mac_header(struct sk_buff *skb) +{ + skb->mac_header = (typeof(skb->mac_header))~0U; +} + static inline void skb_reset_mac_header(struct sk_buff *skb) { skb->mac_header = skb->data - skb->head; -- cgit v1.2.3 From 14c9c014babedc6098bf4cd83c622997867bc0df Mon Sep 17 00:00:00 2001 From: Sean O'Brien Date: Wed, 9 Sep 2020 15:03:04 -0700 Subject: HID: add vivaldi HID driver Add vivaldi HID driver. This driver allows us to read and report the top row layout of keyboards which provide a vendor-defined (Google) HID usage. Signed-off-by: Sean O'Brien Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index c7044a14200e..58684657960b 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -163,6 +163,7 @@ struct hid_item { #define HID_UP_LNVENDOR 0xffa00000 #define HID_UP_SENSOR 0x00200000 #define HID_UP_ASUSVENDOR 0xff310000 +#define HID_UP_GOOGLEVENDOR 0xffd10000 #define HID_USAGE 0x0000ffff @@ -371,6 +372,7 @@ struct hid_item { #define HID_GROUP_LOGITECH_DJ_DEVICE 0x0102 #define HID_GROUP_STEAM 0x0103 #define HID_GROUP_LOGITECH_27MHZ_DEVICE 0x0104 +#define HID_GROUP_VIVALDI 0x0105 /* * HID protocol status -- cgit v1.2.3 From 20c168be684a97b084525906eb7ed017b7f9c0b8 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 30 Sep 2020 12:50:59 +0200 Subject: net: macb: move pdata to private header struct macb_platform_data is only used by macb_pci to register the platform device, move its definition to cadence/macb.h and remove platform_data/macb.h Signed-off-by: Alexandre Belloni Signed-off-by: David S. Miller --- include/linux/platform_data/macb.h | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 include/linux/platform_data/macb.h (limited to 'include/linux') diff --git a/include/linux/platform_data/macb.h b/include/linux/platform_data/macb.h deleted file mode 100644 index aa5b5562d6f7..000000000000 --- a/include/linux/platform_data/macb.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2004-2006 Atmel Corporation - */ -#ifndef __MACB_PDATA_H__ -#define __MACB_PDATA_H__ - -#include - -/** - * struct macb_platform_data - platform data for MACB Ethernet - * @pclk: platform clock - * @hclk: AHB clock - */ -struct macb_platform_data { - struct clk *pclk; - struct clk *hclk; -}; - -#endif /* __MACB_PDATA_H__ */ -- cgit v1.2.3 From 0f2122045b946241a9e549c2a76cea54fa58a7ff Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 13 Sep 2020 13:09:39 -0600 Subject: io_uring: don't rely on weak ->files references Grab actual references to the files_struct. To avoid circular references issues due to this, we add a per-task note that keeps track of what io_uring contexts a task has used. When the tasks execs or exits its assigned files, we cancel requests based on this tracking. With that, we can grab proper references to the files table, and no longer need to rely on stashing away ring_fd and ring_file to check if the ring_fd may have been closed. Cc: stable@vger.kernel.org # v5.5+ Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 5 +++++ 2 files changed, 58 insertions(+) create mode 100644 include/linux/io_uring.h (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h new file mode 100644 index 000000000000..c09135a1ef13 --- /dev/null +++ b/include/linux/io_uring.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_IO_URING_H +#define _LINUX_IO_URING_H + +#include +#include +#include + +struct io_uring_task { + /* submission side */ + struct xarray xa; + struct wait_queue_head wait; + struct file *last; + atomic_long_t req_issue; + + /* completion side */ + bool in_idle ____cacheline_aligned_in_smp; + atomic_long_t req_complete; +}; + +#if defined(CONFIG_IO_URING) +void __io_uring_task_cancel(void); +void __io_uring_files_cancel(struct files_struct *files); +void __io_uring_free(struct task_struct *tsk); + +static inline void io_uring_task_cancel(void) +{ + if (current->io_uring && !xa_empty(¤t->io_uring->xa)) + __io_uring_task_cancel(); +} +static inline void io_uring_files_cancel(struct files_struct *files) +{ + if (current->io_uring && !xa_empty(¤t->io_uring->xa)) + __io_uring_files_cancel(files); +} +static inline void io_uring_free(struct task_struct *tsk) +{ + if (tsk->io_uring) + __io_uring_free(tsk); +} +#else +static inline void io_uring_task_cancel(void) +{ +} +static inline void io_uring_files_cancel(struct files_struct *files) +{ +} +static inline void io_uring_free(struct task_struct *tsk) +{ +} +#endif + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index afe01e232935..8bf2295ebee4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -63,6 +63,7 @@ struct sighand_struct; struct signal_struct; struct task_delay_info; struct task_group; +struct io_uring_task; /* * Task state bitmask. NOTE! These bits are also @@ -935,6 +936,10 @@ struct task_struct { /* Open file information: */ struct files_struct *files; +#ifdef CONFIG_IO_URING + struct io_uring_task *io_uring; +#endif + /* Namespaces: */ struct nsproxy *nsproxy; -- cgit v1.2.3 From a3ec60054082ca2c2145ba487f9fc4de904e2b03 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 18 Sep 2020 20:41:00 -0600 Subject: io_uring: move io_uring_get_socket() into io_uring.h Now we have a io_uring kernel header, move this definition out of fs.h and into io_uring.h where it belongs. Signed-off-by: Jens Axboe --- include/linux/fs.h | 9 --------- include/linux/io_uring.h | 5 +++++ 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7519ae003a08..1c4068428461 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3514,15 +3514,6 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice); -#if defined(CONFIG_IO_URING) -extern struct sock *io_uring_get_socket(struct file *file); -#else -static inline struct sock *io_uring_get_socket(struct file *file) -{ - return NULL; -} -#endif - int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags, unsigned int flags); diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index c09135a1ef13..96315cfaf6d1 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -19,6 +19,7 @@ struct io_uring_task { }; #if defined(CONFIG_IO_URING) +struct sock *io_uring_get_socket(struct file *file); void __io_uring_task_cancel(void); void __io_uring_files_cancel(struct files_struct *files); void __io_uring_free(struct task_struct *tsk); @@ -39,6 +40,10 @@ static inline void io_uring_free(struct task_struct *tsk) __io_uring_free(tsk); } #else +static inline struct sock *io_uring_get_socket(struct file *file) +{ + return NULL; +} static inline void io_uring_task_cancel(void) { } -- cgit v1.2.3 From ce71bfea207b4d7c21d36f24ec37618ffcea1da8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 31 Aug 2020 12:08:10 -0600 Subject: fs: align IOCB_* flags with RWF_* flags We have a set of flags that are shared between the two and inherired in kiocb_set_rw_flags(), but we check and set these individually. Reorder the IOCB flags so that the bottom part of the space is synced with the RWF flag space, and then we can do them all in one mask and set operation. The only exception is RWF_SYNC, which needs to mark IOCB_SYNC and IOCB_DSYNC. Do that one separately. This shaves 15 bytes of text from kiocb_set_rw_flags() for me. Suggested-by: Matthew Wilcox (Oracle) Signed-off-by: Jens Axboe --- include/linux/fs.h | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1c4068428461..4ebc14dee421 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -310,17 +310,20 @@ enum rw_hint { WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, }; -#define IOCB_EVENTFD (1 << 0) -#define IOCB_APPEND (1 << 1) -#define IOCB_DIRECT (1 << 2) -#define IOCB_HIPRI (1 << 3) -#define IOCB_DSYNC (1 << 4) -#define IOCB_SYNC (1 << 5) -#define IOCB_WRITE (1 << 6) -#define IOCB_NOWAIT (1 << 7) +/* Match RWF_* bits to IOCB bits */ +#define IOCB_HIPRI (__force int) RWF_HIPRI +#define IOCB_DSYNC (__force int) RWF_DSYNC +#define IOCB_SYNC (__force int) RWF_SYNC +#define IOCB_NOWAIT (__force int) RWF_NOWAIT +#define IOCB_APPEND (__force int) RWF_APPEND + +/* non-RWF related bits - start at 16 */ +#define IOCB_EVENTFD (1 << 16) +#define IOCB_DIRECT (1 << 17) +#define IOCB_WRITE (1 << 18) /* iocb->ki_waitq is valid */ -#define IOCB_WAITQ (1 << 8) -#define IOCB_NOIO (1 << 9) +#define IOCB_WAITQ (1 << 19) +#define IOCB_NOIO (1 << 20) struct kiocb { struct file *ki_filp; @@ -3317,6 +3320,9 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) { int kiocb_flags = 0; + /* make sure there's no overlap between RWF and private IOCB flags */ + BUILD_BUG_ON((__force int) RWF_SUPPORTED & IOCB_EVENTFD); + if (!flags) return 0; if (unlikely(flags & ~RWF_SUPPORTED)) @@ -3325,16 +3331,11 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) return -EOPNOTSUPP; - kiocb_flags |= IOCB_NOWAIT | IOCB_NOIO; + kiocb_flags |= IOCB_NOIO; } - if (flags & RWF_HIPRI) - kiocb_flags |= IOCB_HIPRI; - if (flags & RWF_DSYNC) - kiocb_flags |= IOCB_DSYNC; + kiocb_flags |= (__force int) (flags & RWF_SUPPORTED); if (flags & RWF_SYNC) - kiocb_flags |= (IOCB_DSYNC | IOCB_SYNC); - if (flags & RWF_APPEND) - kiocb_flags |= IOCB_APPEND; + kiocb_flags |= IOCB_DSYNC; ki->ki_flags |= kiocb_flags; return 0; -- cgit v1.2.3 From 7cd7becdddb00620fb8deb74e6fe4e5a1522ae5a Mon Sep 17 00:00:00 2001 From: sunils Date: Fri, 11 Sep 2020 00:13:26 +0300 Subject: net/mlx5: E-switch, Use PF num in metadata reg c0 Currently only 256 vports can be supported as only 8 bits are reserved for them and 8 bits are reserved for vhca_ids in metadata reg c0. To support more than 256 vports, replace vhca_id with a unique shorter 4-bit PF number which covers upto 16 PF's. Use remaining 12 bits for vports ranging 1-4095. This will continue to generate unique metadata even if multiple PCI devices have same switch_id. Signed-off-by: sunils Reviewed-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index c16827eeba9c..b0ae8020f13e 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -74,15 +74,16 @@ bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw); bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw); /* Reg C0 usage: - * Reg C0 = < ESW_VHCA_ID_BITS(8) | ESW_VPORT BITS(8) | ESW_CHAIN_TAG(16) > + * Reg C0 = < ESW_PFNUM_BITS(4) | ESW_VPORT BITS(12) | ESW_CHAIN_TAG(16) > * - * Highest 8 bits of the reg c0 is the vhca_id, next 8 bits is vport_num, - * the rest (lowest 16 bits) is left for tc chain tag restoration. - * VHCA_ID + VPORT comprise the SOURCE_PORT matching. + * Highest 4 bits of the reg c0 is the PF_NUM (range 0-15), 12 bits of + * unique non-zero vport id (range 1-4095). The rest (lowest 16 bits) is left + * for tc chain tag restoration. + * PFNUM + VPORT comprise the SOURCE_PORT matching. */ -#define ESW_VHCA_ID_BITS 8 -#define ESW_VPORT_BITS 8 -#define ESW_SOURCE_PORT_METADATA_BITS (ESW_VHCA_ID_BITS + ESW_VPORT_BITS) +#define ESW_VPORT_BITS 12 +#define ESW_PFNUM_BITS 4 +#define ESW_SOURCE_PORT_METADATA_BITS (ESW_PFNUM_BITS + ESW_VPORT_BITS) #define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS) #define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS) #define ESW_CHAIN_TAG_METADATA_MASK GENMASK(ESW_CHAIN_TAG_METADATA_BITS - 1,\ -- cgit v1.2.3 From 23cc3493b5e107b8deb697cf3157a07276b5eff7 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 25 Sep 2020 09:32:45 -0700 Subject: iommu/uapi: Rename uapi functions User APIs such as iommu_sva_unbind_gpasid() may also be used by the kernel. Since we introduced user pointer to the UAPI functions, in-kernel callers cannot share the same APIs. In-kernel callers are also trusted, there is no need to validate the data. We plan to have two flavors of the same API functions, one called through ioctls, carrying a user pointer and one called directly with valid IOMMU UAPI structs. To differentiate both, let's rename existing functions with an iommu_uapi_ prefix. Suggested-by: Alex Williamson Signed-off-by: Jacob Pan Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/1601051567-54787-5-git-send-email-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2ad26d8b4ab9..d18de2afa6fb 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -424,13 +424,13 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); -extern int iommu_cache_invalidate(struct iommu_domain *domain, - struct device *dev, - struct iommu_cache_invalidate_info *inv_info); -extern int iommu_sva_bind_gpasid(struct iommu_domain *domain, - struct device *dev, struct iommu_gpasid_bind_data *data); -extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid); +extern int iommu_uapi_cache_invalidate(struct iommu_domain *domain, + struct device *dev, + struct iommu_cache_invalidate_info *inv_info); +extern int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, + struct device *dev, struct iommu_gpasid_bind_data *data); +extern int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, @@ -1032,21 +1032,22 @@ static inline int iommu_sva_get_pasid(struct iommu_sva *handle) return IOMMU_PASID_INVALID; } -static inline int -iommu_cache_invalidate(struct iommu_domain *domain, - struct device *dev, - struct iommu_cache_invalidate_info *inv_info) +static inline int iommu_uapi_cache_invalidate(struct iommu_domain *domain, + struct device *dev, + struct iommu_cache_invalidate_info *inv_info) { return -ENODEV; } -static inline int iommu_sva_bind_gpasid(struct iommu_domain *domain, - struct device *dev, struct iommu_gpasid_bind_data *data) + +static inline int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, + struct device *dev, + struct iommu_gpasid_bind_data *data) { return -ENODEV; } -static inline int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, int pasid) +static inline int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, + struct device *dev, int pasid) { return -ENODEV; } -- cgit v1.2.3 From d90573812eea63c6bc8ab8a38f661b4c27c3cdc0 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 25 Sep 2020 09:32:46 -0700 Subject: iommu/uapi: Handle data and argsz filled by users IOMMU user APIs are responsible for processing user data. This patch changes the interface such that user pointers can be passed into IOMMU code directly. Separate kernel APIs without user pointers are introduced for in-kernel users of the UAPI functionality. IOMMU UAPI data has a user filled argsz field which indicates the data length of the structure. User data is not trusted, argsz must be validated based on the current kernel data size, mandatory data size, and feature flags. User data may also be extended, resulting in possible argsz increase. Backward compatibility is ensured based on size and flags (or the functional equivalent fields) checking. This patch adds sanity checks in the IOMMU layer. In addition to argsz, reserved/unused fields in padding, flags, and version are also checked. Details are documented in Documentation/userspace-api/iommu.rst Signed-off-by: Liu Yi L Signed-off-by: Jacob Pan Reviewed-by: Jean-Philippe Brucker Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/1601051567-54787-6-git-send-email-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d18de2afa6fb..82876f682367 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -426,11 +426,14 @@ extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); extern int iommu_uapi_cache_invalidate(struct iommu_domain *domain, struct device *dev, - struct iommu_cache_invalidate_info *inv_info); + void __user *uinfo); + extern int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, - struct device *dev, struct iommu_gpasid_bind_data *data); + struct device *dev, void __user *udata); extern int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid); + struct device *dev, void __user *udata); +extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, @@ -1032,22 +1035,29 @@ static inline int iommu_sva_get_pasid(struct iommu_sva *handle) return IOMMU_PASID_INVALID; } -static inline int iommu_uapi_cache_invalidate(struct iommu_domain *domain, - struct device *dev, - struct iommu_cache_invalidate_info *inv_info) +static inline int +iommu_uapi_cache_invalidate(struct iommu_domain *domain, + struct device *dev, + struct iommu_cache_invalidate_info *inv_info) { return -ENODEV; } static inline int iommu_uapi_sva_bind_gpasid(struct iommu_domain *domain, - struct device *dev, - struct iommu_gpasid_bind_data *data) + struct device *dev, void __user *udata) { return -ENODEV; } static inline int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, int pasid) + struct device *dev, void __user *udata) +{ + return -ENODEV; +} + +static inline int iommu_sva_unbind_gpasid(struct iommu_domain *domain, + struct device *dev, + ioasid_t pasid) { return -ENODEV; } -- cgit v1.2.3 From 88451f2cd3cec2abc30debdf129422d2699d1eba Mon Sep 17 00:00:00 2001 From: Zqiang Date: Tue, 8 Sep 2020 14:27:09 +0800 Subject: debugobjects: Free per CPU pool after CPU unplug If a CPU is offlined the debug objects per CPU pool is not cleaned up. If the CPU is never onlined again then the objects in the pool are wasted. Add a CPU hotplug callback which is invoked after the CPU is dead to free the pool. [ tglx: Massaged changelog and added comment about remote access safety ] Signed-off-by: Zqiang Signed-off-by: Thomas Gleixner Cc: Waiman Long Link: https://lore.kernel.org/r/20200908062709.11441-1-qiang.zhang@windriver.com --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index bf9181cef444..6f524bbf71a2 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -36,6 +36,7 @@ enum cpuhp_state { CPUHP_X86_MCE_DEAD, CPUHP_VIRT_NET_DEAD, CPUHP_SLUB_DEAD, + CPUHP_DEBUG_OBJ_DEAD, CPUHP_MM_WRITEBACK_DEAD, CPUHP_MM_VMSTAT_DEAD, CPUHP_SOFTIRQ_DEAD, -- cgit v1.2.3 From 6d849653b00f2336efcd371fe85a7eb31bb094e9 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Sat, 26 Sep 2020 14:55:47 +0200 Subject: regulator: qcom_smd: Add PM660/PM660L regulator support The PM660 and PM660L are a very very common PMIC combo, found on boards using the SDM630, SDM636, SDM660 (and SDA variants) SoC. PM660 provides 6 SMPS and 19 LDOs (of which one is unaccesible), while PM660L provides 5 SMPS (of which S3 and S4 are combined), 10 LDOs and a Buck-or-Boost (BoB) regulator. The PM660L IC also provides other regulators that are very specialized (for example, for the display) and will be managed in the other appropriate drivers (for example, labibb). Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20200926125549.13191-6-kholk11@gmail.com Signed-off-by: Mark Brown --- include/linux/soc/qcom/smd-rpm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h index da304ce8c8f7..f2645ec52520 100644 --- a/include/linux/soc/qcom/smd-rpm.h +++ b/include/linux/soc/qcom/smd-rpm.h @@ -19,6 +19,10 @@ struct qcom_smd_rpm; #define QCOM_SMD_RPM_CLK_BUF_A 0x616B6C63 #define QCOM_SMD_RPM_LDOA 0x616f646c #define QCOM_SMD_RPM_LDOB 0x626F646C +#define QCOM_SMD_RPM_RWCX 0x78637772 +#define QCOM_SMD_RPM_RWMX 0x786d7772 +#define QCOM_SMD_RPM_RWLC 0x636c7772 +#define QCOM_SMD_RPM_RWLM 0x6d6c7772 #define QCOM_SMD_RPM_MEM_CLK 0x326b6c63 #define QCOM_SMD_RPM_MISC_CLK 0x306b6c63 #define QCOM_SMD_RPM_NCPA 0x6170636E -- cgit v1.2.3 From 41a7431dbaa37533c3b732cdea425a7b8f2d4162 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 19 Sep 2020 16:04:18 +0200 Subject: power: supply: bq27xxx: add support for TI bq34z100 Add support for new device: the TI bq34z100-G1, a Wide Range Fuel Gauge for Li-Ion, PbA, NiMH, and NiCd batteries. The device shares a lot with other models, although it has its own differences requiring new quirks. This patch was tested on a system equipped with NiMH batteries. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 987d9652aa4e..111a40d0d3d5 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -32,6 +32,7 @@ enum bq27xxx_chip { BQ27621, BQ27Z561, BQ28Z610, + BQ34Z100, }; struct bq27xxx_device_info; -- cgit v1.2.3 From a4947e84f23474803b62a2759b5808147e4e15f9 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 13 Sep 2020 13:29:28 +0300 Subject: overflow: Include header file with SIZE_MAX declaration The various array_size functions use SIZE_MAX define, but missed limits.h causes to failure to compile code that needs overflow.h. In file included from drivers/infiniband/core/uverbs_std_types_device.c:6: ./include/linux/overflow.h: In function 'array_size': ./include/linux/overflow.h:258:10: error: 'SIZE_MAX' undeclared (first use in this function) 258 | return SIZE_MAX; | ^~~~~~~~ Fixes: 610b15c50e86 ("overflow.h: Add allocation size calculation helpers") Link: https://lore.kernel.org/r/20200913102928.134985-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/overflow.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 93fcef105061..ff3c48f0abc5 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -3,6 +3,7 @@ #define __LINUX_OVERFLOW_H #include +#include /* * In the fallback code below, we need to compute the minimum and -- cgit v1.2.3 From 3301c215a2bb94b5a0afcb444bbe9bf2a395a65d Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 21 Aug 2020 10:55:44 +0800 Subject: USB: UDC: Expand device model API interface The routines used by the UDC core to interface with the kernel's device model, namely usb_add_gadget_udc(), usb_add_gadget_udc_release(), and usb_del_gadget_udc(), provide access to only a subset of the device model's full API. They include functionality equivalent to device_register() and device_unregister() for gadgets, but they omit device_initialize(), device_add(), device_del(), get_device(), and put_device(). This patch expands the UDC API by adding usb_initialize_gadget(), usb_add_gadget(), usb_del_gadget(), usb_get_gadget(), and usb_put_gadget() to fill in the gap. It rewrites the existing routines to call the new ones. CC: Anton Vasilyev CC: Evgeny Novikov CC: Benjamin Herrenschmidt Reviewed-by: Greg Kroah-Hartman Signed-off-by: Alan Stern Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 52ce1f6b8f83..e7351d64f11f 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -436,6 +436,7 @@ struct usb_gadget { }; #define work_to_gadget(w) (container_of((w), struct usb_gadget, work)) +/* Interface to the device model */ static inline void set_gadget_data(struct usb_gadget *gadget, void *data) { dev_set_drvdata(&gadget->dev, data); } static inline void *get_gadget_data(struct usb_gadget *gadget) @@ -444,6 +445,26 @@ static inline struct usb_gadget *dev_to_usb_gadget(struct device *dev) { return container_of(dev, struct usb_gadget, dev); } +static inline struct usb_gadget *usb_get_gadget(struct usb_gadget *gadget) +{ + get_device(&gadget->dev); + return gadget; +} +static inline void usb_put_gadget(struct usb_gadget *gadget) +{ + put_device(&gadget->dev); +} +extern void usb_initialize_gadget(struct device *parent, + struct usb_gadget *gadget, void (*release)(struct device *dev)); +extern int usb_add_gadget(struct usb_gadget *gadget); +extern void usb_del_gadget(struct usb_gadget *gadget); + +/* Legacy device-model interface */ +extern int usb_add_gadget_udc_release(struct device *parent, + struct usb_gadget *gadget, void (*release)(struct device *dev)); +extern int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget); +extern void usb_del_gadget_udc(struct usb_gadget *gadget); +extern char *usb_get_gadget_udc_name(void); /* iterates the non-control endpoints; 'tmp' is a struct usb_ep pointer */ #define gadget_for_each_ep(tmp, gadget) \ @@ -735,12 +756,6 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver); */ int usb_gadget_unregister_driver(struct usb_gadget_driver *driver); -extern int usb_add_gadget_udc_release(struct device *parent, - struct usb_gadget *gadget, void (*release)(struct device *dev)); -extern int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget); -extern void usb_del_gadget_udc(struct usb_gadget *gadget); -extern char *usb_get_gadget_udc_name(void); - /*-------------------------------------------------------------------------*/ /* utility to simplify dealing with string descriptors */ -- cgit v1.2.3 From 4d12a897fa1276737724393331d1002962f62940 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 29 Sep 2020 23:22:00 +0530 Subject: bus: mhi: fix doubled words and struct image_info kernel-doc Drop doubled word "table" in kernel-doc. Fix syntax for the kernel-doc notation for struct image_info. Note that the bhi_vec field is private and not part of the kernel-doc. Drop doubled word "device" in a comment. Cc: Manivannan Sadhasivam Cc: Hemant Kumar Reviewed-by: Manivannan Sadhasivam [mani: Added bus: prefix to the commit subject] Signed-off-by: Randy Dunlap Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200929175218.8178-2-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index c4a940d98912..0779bc689b3e 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -85,13 +85,15 @@ enum mhi_ch_type { }; /** - * struct image_info - Firmware and RDDM table table - * @mhi_buf - Buffer for firmware and RDDM table - * @entries - # of entries in table + * struct image_info - Firmware and RDDM table + * @mhi_buf: Buffer for firmware and RDDM table + * @entries: # of entries in table */ struct image_info { struct mhi_buf *mhi_buf; + /* private: from internal.h */ struct bhi_vec_entry *bhi_vec; + /* public: */ u32 entries; }; @@ -593,7 +595,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl); /** * mhi_sync_power_up - Start MHI power up sequence and wait till the device - * device enters valid EE state + * enters valid EE state * @mhi_cntrl: MHI controller */ int mhi_sync_power_up(struct mhi_controller *mhi_cntrl); -- cgit v1.2.3 From 5aa93f0576b4e7be60ffcccb674470c59789fd1b Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Tue, 29 Sep 2020 23:22:05 +0530 Subject: bus: mhi: core: Use generic name field for an MHI device An MHI device is not necessarily associated with only channels as we can have one associated with the controller itself. Hence, the chan_name field within the mhi_device structure should instead be replaced with a generic name to accurately reflect any type of MHI device. Reviewed-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Signed-off-by: Bhaumik Bhatt Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200929175218.8178-7-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 0779bc689b3e..bb337d163dda 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -438,10 +438,10 @@ struct mhi_controller { }; /** - * struct mhi_device - Structure representing a MHI device which binds - * to channels + * struct mhi_device - Structure representing an MHI device which binds + * to channels or is associated with controllers * @id: Pointer to MHI device ID struct - * @chan_name: Name of the channel to which the device binds + * @name: Name of the associated MHI device * @mhi_cntrl: Controller the device belongs to * @ul_chan: UL channel for the device * @dl_chan: DL channel for the device @@ -453,7 +453,7 @@ struct mhi_controller { */ struct mhi_device { const struct mhi_device_id *id; - const char *chan_name; + const char *name; struct mhi_controller *mhi_cntrl; struct mhi_chan *ul_chan; struct mhi_chan *dl_chan; -- cgit v1.2.3 From 601455dae00a853d1d8d6d768b3b79a7d59a5853 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Tue, 29 Sep 2020 23:22:07 +0530 Subject: bus: mhi: core: Introduce counters to track MHI device state transitions Use counters to track MHI device state transitions such as those to M0, M2, or M3 states. This can help in better debug, allowing the user to see the number of transitions to a certain MHI state when queried using debugfs entries or via other mechanisms. Reviewed-by: Manivannan Sadhasivam Signed-off-by: Bhaumik Bhatt Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200929175218.8178-9-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index bb337d163dda..c56b4447a4e9 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -328,6 +328,7 @@ struct mhi_controller_config { * @dev_state: MHI device state * @dev_wake: Device wakeup count * @pending_pkts: Pending packets for the controller + * @M0, M2, M3: Counters to track number of device MHI state changes * @transition_list: List of MHI state transitions * @transition_lock: Lock for protecting MHI state transition list * @wlock: Lock for protecting device wakeup @@ -407,6 +408,7 @@ struct mhi_controller { enum mhi_state dev_state; atomic_t dev_wake; atomic_t pending_pkts; + u32 M0, M2, M3; struct list_head transition_list; spinlock_t transition_lock; spinlock_t wlock; -- cgit v1.2.3 From 8e3729bfa7be159d1f94d0caab9c1beff8268672 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Tue, 29 Sep 2020 23:22:08 +0530 Subject: bus: mhi: core: Read and save device hardware information from BHI Device hardware specific information such as serial number and the OEM PK hash can be read using BHI and saved on host to identify the endpoint. Reviewed-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Signed-off-by: Bhaumik Bhatt Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200929175218.8178-10-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index c56b4447a4e9..0c97f4bc5fae 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -16,6 +16,8 @@ #include #include +#define MHI_MAX_OEM_PK_HASH_SEGMENTS 16 + struct mhi_chan; struct mhi_event; struct mhi_ctxt; @@ -316,6 +318,8 @@ struct mhi_controller_config { * @device_number: MHI controller device number * @major_version: MHI controller major revision number * @minor_version: MHI controller minor revision number + * @serial_number: MHI controller serial number obtained from BHI + * @oem_pk_hash: MHI controller OEM PK Hash obtained from BHI * @mhi_event: MHI event ring configurations table * @mhi_cmd: MHI command ring configurations table * @mhi_ctxt: MHI device context, shared memory between host and device @@ -394,6 +398,8 @@ struct mhi_controller { u32 device_number; u32 major_version; u32 minor_version; + u32 serial_number; + u32 oem_pk_hash[MHI_MAX_OEM_PK_HASH_SEGMENTS]; struct mhi_event *mhi_event; struct mhi_cmd *mhi_cmd; -- cgit v1.2.3 From f42dfbe8f712127031e7b9bc938a1c33cec2ff57 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Tue, 29 Sep 2020 23:22:09 +0530 Subject: bus: mhi: core: Introduce APIs to allocate and free the MHI controller Client devices should use the APIs provided to allocate and free the MHI controller structure. This will help ensure that the structure is zero-initialized and there are no false positives with respect to reading any values such as the serial number or the OEM PK hash. Suggested-by: Manivannan Sadhasivam Reviewed-by: Manivannan Sadhasivam Signed-off-by: Bhaumik Bhatt Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200929175218.8178-11-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 0c97f4bc5fae..6b987e8cc438 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -527,6 +527,18 @@ struct mhi_driver { #define to_mhi_driver(drv) container_of(drv, struct mhi_driver, driver) #define to_mhi_device(dev) container_of(dev, struct mhi_device, dev) +/** + * mhi_alloc_controller - Allocate the MHI Controller structure + * Allocate the mhi_controller structure using zero initialized memory + */ +struct mhi_controller *mhi_alloc_controller(void); + +/** + * mhi_free_controller - Free the MHI Controller structure + * Free the mhi_controller structure which was previously allocated + */ +void mhi_free_controller(struct mhi_controller *mhi_cntrl); + /** * mhi_register_controller - Register MHI controller * @mhi_cntrl: MHI controller to register -- cgit v1.2.3 From f38173a731cae380885e843dace21c8dc4198285 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Tue, 29 Sep 2020 23:22:10 +0530 Subject: bus: mhi: core: Add const qualifier to MHI config information MHI channel, event and controller config data needs to be treated read only information. Add const qualifier to make sure config information passed by MHI controller is not modified by MHI core driver. Suggested-by: Kalle Valo Reviewed-by: Manivannan Sadhasivam Signed-off-by: Hemant Kumar Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200929175218.8178-12-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 6b987e8cc438..b2c0214bfbd6 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -280,9 +280,9 @@ struct mhi_controller_config { u32 timeout_ms; u32 buf_len; u32 num_channels; - struct mhi_channel_config *ch_cfg; + const struct mhi_channel_config *ch_cfg; u32 num_events; - struct mhi_event_config *event_cfg; + const struct mhi_event_config *event_cfg; bool use_bounce_buf; bool m2_no_db; }; @@ -545,7 +545,7 @@ void mhi_free_controller(struct mhi_controller *mhi_cntrl); * @config: Configuration to use for the controller */ int mhi_register_controller(struct mhi_controller *mhi_cntrl, - struct mhi_controller_config *config); + const struct mhi_controller_config *config); /** * mhi_unregister_controller - Unregister MHI controller -- cgit v1.2.3 From e1427f32b85010ca0c38104955e234ca89d4cee5 Mon Sep 17 00:00:00 2001 From: Clark Williams Date: Tue, 29 Sep 2020 23:22:11 +0530 Subject: bus: mhi: Remove include of rwlock_types.h rwlock.h should not be included directly. Instead linux/splinlock.h should be included. Including it directly will break the RT build. Also there is no point in including _types.h headers directly. There is no benefit in including the type without the accessor. Fixes: 0cbf260820fa7 ("bus: mhi: core: Add support for registering MHI controllers") Reviewed-by: Manivannan Sadhasivam Signed-off-by: Clark Williams Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200929175218.8178-13-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index b2c0214bfbd6..008b8f6fa9ff 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -9,10 +9,9 @@ #include #include #include -#include #include #include -#include +#include #include #include -- cgit v1.2.3 From c7bd825e52b3d0b9ae519a006b80521d4587f864 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Tue, 29 Sep 2020 23:22:13 +0530 Subject: bus: mhi: core: Introduce debugfs entries for MHI Introduce debugfs entries to show state, register, channel, device, and event rings information. Allow the host to dump registers, issue device wake, and change the MHI timeout to help in debug. Reviewed-by: Manivannan Sadhasivam Signed-off-by: Bhaumik Bhatt Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200929175218.8178-15-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 008b8f6fa9ff..fb45a0ff9aa3 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -291,6 +291,7 @@ struct mhi_controller_config { * @cntrl_dev: Pointer to the struct device of physical bus acting as the MHI * controller (required) * @mhi_dev: MHI device instance for the controller + * @debugfs_dentry: MHI controller debugfs directory * @regs: Base address of MHI MMIO register space (required) * @bhi: Points to base of MHI BHI register space * @bhie: Points to base of MHI BHIe register space @@ -370,6 +371,7 @@ struct mhi_controller_config { struct mhi_controller { struct device *cntrl_dev; struct mhi_device *mhi_dev; + struct dentry *debugfs_dentry; void __iomem *regs; void __iomem *bhi; void __iomem *bhie; -- cgit v1.2.3 From 9654ab011e28ee756d231035c488509dc214fa6f Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Tue, 29 Sep 2020 23:22:16 +0530 Subject: bus: mhi: Remove unused nr_irqs_req variable nr_irqs_req is unused in MHI stack. Reviewed-by: Manivannan Sadhasivam Signed-off-by: Loic Poulain Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200929175218.8178-18-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index fb45a0ff9aa3..d4841e5a5f45 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -312,7 +312,6 @@ struct mhi_controller_config { * @total_ev_rings: Total # of event rings allocated * @hw_ev_rings: Number of hardware event rings * @sw_ev_rings: Number of software event rings - * @nr_irqs_req: Number of IRQs required to operate (optional) * @nr_irqs: Number of IRQ allocated by bus master (required) * @family_number: MHI controller family number * @device_number: MHI controller device number @@ -393,7 +392,6 @@ struct mhi_controller { u32 total_ev_rings; u32 hw_ev_rings; u32 sw_ev_rings; - u32 nr_irqs_req; u32 nr_irqs; u32 family_number; u32 device_number; -- cgit v1.2.3 From 2efc459d06f1630001e3984854848a5647086232 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 16 Sep 2020 13:40:38 -0700 Subject: sysfs: Add sysfs_emit and sysfs_emit_at to format sysfs output Output defects can exist in sysfs content using sprintf and snprintf. sprintf does not know the PAGE_SIZE maximum of the temporary buffer used for outputting sysfs content and it's possible to overrun the PAGE_SIZE buffer length. Add a generic sysfs_emit function that knows that the size of the temporary buffer and ensures that no overrun is done. Add a generic sysfs_emit_at function that can be used in multiple call situations that also ensures that no overrun is done. Validate the output buffer argument to be page aligned. Validate the offset len argument to be within the PAGE_SIZE buf. Signed-off-by: Joe Perches Link: https://lore.kernel.org/r/884235202216d464d61ee975f7465332c86f76b2.1600285923.git.joe@perches.com Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 34e84122f635..2caa34c1ca1a 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -329,6 +329,10 @@ int sysfs_groups_change_owner(struct kobject *kobj, int sysfs_group_change_owner(struct kobject *kobj, const struct attribute_group *groups, kuid_t kuid, kgid_t kgid); +__printf(2, 3) +int sysfs_emit(char *buf, const char *fmt, ...); +__printf(3, 4) +int sysfs_emit_at(char *buf, int at, const char *fmt, ...); #else /* CONFIG_SYSFS */ @@ -576,6 +580,17 @@ static inline int sysfs_group_change_owner(struct kobject *kobj, return 0; } +__printf(2, 3) +static inline int sysfs_emit(char *buf, const char *fmt, ...) +{ + return 0; +} + +__printf(3, 4) +static inline int sysfs_emit_at(char *buf, int at, const char *fmt, ...) +{ + return 0; +} #endif /* CONFIG_SYSFS */ static inline int __must_check sysfs_create_file(struct kobject *kobj, -- cgit v1.2.3 From 7981593bf083801035b1f1377661849805acb216 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 16 Sep 2020 13:40:43 -0700 Subject: mm: and drivers core: Convert hugetlb_report_node_meminfo to sysfs_emit Convert the unbound sprintf in hugetlb_report_node_meminfo to use sysfs_emit_at so that no possible overrun of a PAGE_SIZE buf can occur. Signed-off-by: Joe Perches Acked-by: Mike Kravetz Link: https://lore.kernel.org/r/894b351b82da6013cde7f36ff4b5493cd0ec30d0.1600285923.git.joe@perches.com Signed-off-by: Greg Kroah-Hartman --- include/linux/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d5cc5f802dd4..ebca2ef02212 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -129,7 +129,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page); void hugetlb_report_meminfo(struct seq_file *); -int hugetlb_report_node_meminfo(int, char *); +int hugetlb_report_node_meminfo(char *buf, int len, int nid); void hugetlb_show_meminfo(void); unsigned long hugetlb_total_pages(void); vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, @@ -245,7 +245,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m) { } -static inline int hugetlb_report_node_meminfo(int nid, char *buf) +static inline int hugetlb_report_node_meminfo(char *buf, int len, int nid) { return 0; } -- cgit v1.2.3 From 12f3467b0d28369d3add7a0deb65fdac9b503c90 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Thu, 24 Sep 2020 11:00:45 +0200 Subject: usb: typec: add typec_find_pwr_opmode This patch adds a function that converts power operation mode string into power operation mode value. It is useful to configure power operation mode through device tree property, as power capabilities may be linked to hardware design. Acked-by: Heikki Krogerus Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20200924090049.9041-3-amelie.delaunay@st.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 9cb1bec94b71..6be558045942 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -268,6 +268,7 @@ int typec_set_mode(struct typec_port *port, int mode); void *typec_get_drvdata(struct typec_port *port); +int typec_find_pwr_opmode(const char *name); int typec_find_orientation(const char *name); int typec_find_port_power_role(const char *name); int typec_find_power_role(const char *name); -- cgit v1.2.3 From 1f86a00c1159fd77e66b1bd6ff1a183f4d46f34d Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Tue, 29 Sep 2020 11:54:31 +0300 Subject: bus/fsl-mc: add support for 'driver_override' in the mc-bus This patch is required for vfio-fsl-mc meta driver to successfully bind layerscape container devices for device passthrough. This patch adds a mechanism to allow a layerscape device to specify a driver rather than a layerscape driver provide a device match. Example to allow a device (dprc.1) to specifically bind with driver (vfio-fsl-mc):- - echo vfio-fsl-mc > /sys/bus/fsl-mc/devices/dprc.1/driver_override - echo dprc.1 > /sys/bus/fsl-mc/drivers/fsl_mc_dprc/unbind - echo dprc.1 > /sys/bus/fsl-mc/drivers/vfio-fsl-mc/bind Reviewed-by: Laurentiu Tudor Acked-by: Laurentiu Tudor Signed-off-by: Bharat Bhushan Signed-off-by: Laurentiu Tudor Signed-off-by: Diana Craciun Link: https://lore.kernel.org/r/20200929085441.17448-4-diana.craciun@oss.nxp.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/mc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index a428c61ead6e..3b5f0c98636d 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -161,6 +161,7 @@ struct fsl_mc_obj_desc { * @regions: pointer to array of MMIO region entries * @irqs: pointer to array of pointers to interrupts allocated to this device * @resource: generic resource associated with this MC object device, if any. + * @driver_override: driver name to force a match * * Generic device object for MC object devices that are "attached" to a * MC bus. @@ -194,6 +195,7 @@ struct fsl_mc_device { struct fsl_mc_device_irq **irqs; struct fsl_mc_resource *resource; struct device_link *consumer_link; + char *driver_override; }; #define to_fsl_mc_device(_dev) \ -- cgit v1.2.3 From e0c171d5537f45185c2e8f97b9ab2e2fdea56fa9 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Tue, 29 Sep 2020 11:54:32 +0300 Subject: bus/fsl-mc: Set the QMAN/BMAN region flags The QMAN region is memory mapped, so it should be of type IORESOURCE_MEM. The region flags bits were wrongly used to pass additional information. Use the bus specific bits for this purpose. Reviewed-by: Laurentiu Tudor Acked-by: Laurentiu Tudor Signed-off-by: Diana Craciun Link: https://lore.kernel.org/r/20200929085441.17448-5-diana.craciun@oss.nxp.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/mc.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 3b5f0c98636d..03a5d16dde73 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -3,6 +3,7 @@ * Freescale Management Complex (MC) bus public interface * * Copyright (C) 2014-2016 Freescale Semiconductor, Inc. + * Copyright 2019-2020 NXP * Author: German Rivera * */ @@ -148,6 +149,13 @@ struct fsl_mc_obj_desc { */ #define FSL_MC_IS_DPRC 0x0001 +/* Region flags */ +/* Indicates that region can be mapped as cacheable */ +#define FSL_MC_REGION_CACHEABLE 0x00000001 + +/* Indicates that region can be mapped as shareable */ +#define FSL_MC_REGION_SHAREABLE 0x00000002 + /** * struct fsl_mc_device - MC object device object * @dev: Linux driver model device object -- cgit v1.2.3 From 715b02ce8968bd560d295945520bae6d73707a73 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Tue, 29 Sep 2020 11:54:34 +0300 Subject: bus/fsl-mc: Add dprc-reset-container support DPRC reset is required by VFIO-mc in order to stop a device to further generate DMA transactions. Reviewed-by: Laurentiu Tudor Acked-by: Laurentiu Tudor Signed-off-by: Bharat Bhushan Signed-off-by: Laurentiu Tudor Signed-off-by: Diana Craciun Link: https://lore.kernel.org/r/20200929085441.17448-7-diana.craciun@oss.nxp.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/mc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 03a5d16dde73..1d8800acf21f 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -524,6 +524,13 @@ static inline bool is_fsl_mc_bus_dpdmai(const struct fsl_mc_device *mc_dev) return mc_dev->dev.type == &fsl_mc_bus_dpdmai_type; } +#define DPRC_RESET_OPTION_NON_RECURSIVE 0x00000001 +int dprc_reset_container(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + int child_container_id, + u32 options); + /* * Data Path Buffer Pool (DPBP) API * Contains initialization APIs and runtime control APIs for DPBP -- cgit v1.2.3 From 5d781fabe64244ccf8a954617ea8b28737517b1c Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Tue, 29 Sep 2020 11:54:35 +0300 Subject: bus/fsl-mc: Export dprc_scan/dprc_remove functions to be used by multiple entities Currently the DPRC scan function is used only by the bus driver. But the same functionality will be needed by the VFIO driver. To support this, the dprc scan function was exported and a little bit adjusted to fit both scenarios. Also the scan mutex initialization is done when the bus object is created, not in dprc_probe in order to be used by both VFIO and bus driver. Similarily dprc_remove_devices is exported to be used by VFIO. Reviewed-by: Laurentiu Tudor Acked-by: Laurentiu Tudor Signed-off-by: Diana Craciun Link: https://lore.kernel.org/r/20200929085441.17448-8-diana.craciun@oss.nxp.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/mc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 1d8800acf21f..da11171bc38f 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -531,6 +531,13 @@ int dprc_reset_container(struct fsl_mc_io *mc_io, int child_container_id, u32 options); +int dprc_scan_container(struct fsl_mc_device *mc_bus_dev, + bool alloc_interrupts); + +void dprc_remove_devices(struct fsl_mc_device *mc_bus_dev, + struct fsl_mc_obj_desc *obj_desc_array, + int num_child_objects_in_mc); + /* * Data Path Buffer Pool (DPBP) API * Contains initialization APIs and runtime control APIs for DPBP -- cgit v1.2.3 From 46f35b5fbbbb22896839e5dbcc24d020926dca36 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Tue, 29 Sep 2020 11:54:36 +0300 Subject: bus/fsl-mc: Export a cleanup function for DPRC Create and export a cleanup function for DPRC. The function is used by the DPRC driver, but it will be used by the VFIO driver as well. Reviewed-by: Laurentiu Tudor Acked-by: Laurentiu Tudor Signed-off-by: Diana Craciun Link: https://lore.kernel.org/r/20200929085441.17448-9-diana.craciun@oss.nxp.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/mc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index da11171bc38f..5519a510b8c9 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -538,6 +538,8 @@ void dprc_remove_devices(struct fsl_mc_device *mc_bus_dev, struct fsl_mc_obj_desc *obj_desc_array, int num_child_objects_in_mc); +int dprc_cleanup(struct fsl_mc_device *mc_dev); + /* * Data Path Buffer Pool (DPBP) API * Contains initialization APIs and runtime control APIs for DPBP -- cgit v1.2.3 From 17eaf21320da965c2571a5f029a2becaffe8313f Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Tue, 29 Sep 2020 11:54:37 +0300 Subject: bus/fsl-mc: Add a container setup function Both DPRC driver and VFIO driver use the same initialization code for the DPRC. Introduced a new function which groups this initialization code. The function is exported and may be used by VFIO as well. Reviewed-by: Laurentiu Tudor Acked-by: Laurentiu Tudor Signed-off-by: Diana Craciun Link: https://lore.kernel.org/r/20200929085441.17448-10-diana.craciun@oss.nxp.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/mc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 5519a510b8c9..e99d181ee4cd 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -540,6 +540,8 @@ void dprc_remove_devices(struct fsl_mc_device *mc_bus_dev, int dprc_cleanup(struct fsl_mc_device *mc_dev); +int dprc_setup(struct fsl_mc_device *mc_dev); + /* * Data Path Buffer Pool (DPBP) API * Contains initialization APIs and runtime control APIs for DPBP -- cgit v1.2.3 From 0dadd95216d56cec01cef066eebe6354f069ff34 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Tue, 29 Sep 2020 11:54:39 +0300 Subject: bus/fsl-mc: Export IRQ pool handling functions to be used by VFIO The IRQ pool handling functions can be used by both DPRC driver and VFIO. Adapt and export those functions. Reviewed-by: Laurentiu Tudor Acked-by: Laurentiu Tudor Signed-off-by: Diana Craciun Link: https://lore.kernel.org/r/20200929085441.17448-12-diana.craciun@oss.nxp.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/mc.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index e99d181ee4cd..f791fe38c251 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -542,6 +542,17 @@ int dprc_cleanup(struct fsl_mc_device *mc_dev); int dprc_setup(struct fsl_mc_device *mc_dev); +/** + * Maximum number of total IRQs that can be pre-allocated for an MC bus' + * IRQ pool + */ +#define FSL_MC_IRQ_POOL_MAX_TOTAL_IRQS 256 + +int fsl_mc_populate_irq_pool(struct fsl_mc_device *mc_bus_dev, + unsigned int irq_count); + +void fsl_mc_cleanup_irq_pool(struct fsl_mc_device *mc_bus_dev); + /* * Data Path Buffer Pool (DPBP) API * Contains initialization APIs and runtime control APIs for DPBP -- cgit v1.2.3 From 273ee53ddf2e7d79565aff0ddc009ea2114d763b Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Tue, 29 Sep 2020 11:54:40 +0300 Subject: bus/fsl-mc: Extend ICID size from 16bit to 32bit In virtual machines the device-id range is defined between 0x10000-0x20000. The reason for using such a large range is to avoid overlapping with the PCI range. Reviewed-by: Laurentiu Tudor Acked-by: Laurentiu Tudor Signed-off-by: Bharat Bhushan Signed-off-by: Laurentiu Tudor Signed-off-by: Diana Craciun Link: https://lore.kernel.org/r/20200929085441.17448-13-diana.craciun@oss.nxp.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/mc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index f791fe38c251..db244874e834 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -195,7 +195,7 @@ struct fsl_mc_device { struct device dev; u64 dma_mask; u16 flags; - u16 icid; + u32 icid; u16 mc_handle; struct fsl_mc_io *mc_io; struct fsl_mc_obj_desc obj_desc; -- cgit v1.2.3 From 894c26a1c274b8eafbb4b1dad67e70e51a106061 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 30 Sep 2020 22:05:42 +0800 Subject: ACPI: Support Generic Initiator only domains Generic Initiators are a new ACPI concept that allows for the description of proximity domains that contain a device which performs memory access (such as a network card) but neither host CPU nor Memory. This patch has the parsing code and provides the infrastructure for an architecture to associate these new domains with their nearest memory processing node. Signed-off-by: Jonathan Cameron Signed-off-by: Rafael J. Wysocki --- include/linux/nodemask.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 27e7fa36f707..3334ce056335 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -399,6 +399,7 @@ enum node_states { #endif N_MEMORY, /* The node has memory(regular, high, movable) */ N_CPU, /* The node has one or more cpus */ + N_GENERIC_INITIATOR, /* The node has one or more Generic Initiators */ NR_NODE_STATES }; -- cgit v1.2.3 From 01aabca2fd545554905321029e6c4c5b2fedb345 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 30 Sep 2020 22:05:44 +0800 Subject: ACPI: Let ACPI know we support Generic Initiator Affinity Structures Until we tell ACPI that we support generic initiators, it will have to operate in fall back domain mode and all _PXM entries should be on existing non GI domains. This patch sets the relevant OSC bit to make that happen. Signed-off-by: Jonathan Cameron Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e9f6cd67943e..edbf3c4116b4 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -545,6 +545,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); #define OSC_SB_PCLPI_SUPPORT 0x00000080 #define OSC_SB_OSLPI_SUPPORT 0x00000100 #define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT 0x00001000 +#define OSC_SB_GENERIC_INITIATOR_SUPPORT 0x00002000 extern bool osc_sb_apei_support_acked; extern bool osc_pc_lpi_support_confirmed; -- cgit v1.2.3 From 1e0cb59d5f0a41d1a2b7a1e7d45024fbac62d6e7 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 30 Sep 2020 13:43:50 +0100 Subject: ACPI: Make acpi_evaluate_dsm() prototype consistent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling a driver which includes both include/linux/acpi.h and include/acpi/acpi_bus.h for when CONFIG_ACPI=n for i386, I get this: /include/acpi/acpi_bus.h:53:20: error: conflicting types for ‘acpi_evaluate_dsm’ union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, ^~~~~~~~~~~~~~~~~ In file included from drivers/scsi/hisi_sas/hisi_sas.h:10:0, from drivers/scsi/hisi_sas/hisi_sas_main.c:7: ./include/linux/acpi.h:866:34: note: previous definition of ‘acpi_evaluate_dsm’ was here static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle, ^~~~~~~~~~~~~~~~~ Fix by making prototype in include/linux/acpi.h consistent. Signed-off-by: John Garry [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 7ce2235f99f9..49dbb105084f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -865,7 +865,7 @@ static inline bool acpi_driver_match_device(struct device *dev, static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, - int rev, int func, + u64 rev, u64 func, union acpi_object *argv4) { return NULL; -- cgit v1.2.3 From 49f618e1b669ef0e26a8d8d7f8fafc7b8fd31531 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 24 Sep 2020 13:04:47 +0200 Subject: PM: domains: Rename power state enums for genpd To clarify the code a bit, let's rename GPD_STATE_ACTIVE into GENPD_STATE_ON and GPD_STATE_POWER_OFF to GENPD_STATE_OFF. Signed-off-by: Ulf Hansson [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index ee11502a575b..66f3c5d64d81 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -64,8 +64,8 @@ #define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5) enum gpd_status { - GPD_STATE_ACTIVE = 0, /* PM domain is active */ - GPD_STATE_POWER_OFF, /* PM domain is off */ + GENPD_STATE_ON = 0, /* PM domain is on */ + GENPD_STATE_OFF, /* PM domain is off */ }; struct dev_power_governor { -- cgit v1.2.3 From 69e0ad37c9f32d5aa1beb02aab4ec0cd055be013 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 28 Sep 2020 16:09:39 -0700 Subject: static_call: Fix return type of static_call_init Functions that are passed to early_initcall should be of type initcall_t, which expects a return type of int. This is not currently an error but a patch in the Clang LTO series could change that in the future. Fixes: 9183c3f9ed71 ("static_call: Add inline static call infrastructure") Signed-off-by: Nathan Chancellor Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Sami Tolvanen Link: https://lore.kernel.org/lkml/20200903203053.3411268-17-samitolvanen@google.com/ --- include/linux/static_call.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/static_call.h b/include/linux/static_call.h index bfa2ba39be57..695da4c9b338 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -136,7 +136,7 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool #ifdef CONFIG_HAVE_STATIC_CALL_INLINE -extern void __init static_call_init(void); +extern int __init static_call_init(void); struct static_call_mod { struct static_call_mod *next; @@ -187,7 +187,7 @@ extern int static_call_text_reserved(void *start, void *end); #elif defined(CONFIG_HAVE_STATIC_CALL) -static inline void static_call_init(void) { } +static inline int static_call_init(void) { return 0; } struct static_call_key { void *func; @@ -234,7 +234,7 @@ static inline int static_call_text_reserved(void *start, void *end) #else /* Generic implementation */ -static inline void static_call_init(void) { } +static inline int static_call_init(void) { return 0; } struct static_call_key { void *func; -- cgit v1.2.3 From 4976b718c3551faba2c0616ef55ebeb74db1c5ca Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 29 Sep 2020 16:50:44 -0700 Subject: bpf: Introduce pseudo_btf_id Pseudo_btf_id is a type of ld_imm insn that associates a btf_id to a ksym so that further dereferences on the ksym can use the BTF info to validate accesses. Internally, when seeing a pseudo_btf_id ld insn, the verifier reads the btf_id stored in the insn[0]'s imm field and marks the dst_reg as PTR_TO_BTF_ID. The btf_id points to a VAR_KIND, which is encoded in btf_vminux by pahole. If the VAR is not of a struct type, the dst reg will be marked as PTR_TO_MEM instead of PTR_TO_BTF_ID and the mem_size is resolved to the size of the VAR's type. >From the VAR btf_id, the verifier can also read the address of the ksym's corresponding kernel var from kallsyms and use that to fill dst_reg. Therefore, the proper functionality of pseudo_btf_id depends on (1) kallsyms and (2) the encoding of kernel global VARs in pahole, which should be available since pahole v1.18. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200929235049.2533242-2-haoluo@google.com --- include/linux/bpf_verifier.h | 7 +++++++ include/linux/btf.h | 15 +++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 363b4f1c562a..e83ef6f6bf43 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -308,6 +308,13 @@ struct bpf_insn_aux_data { u32 map_index; /* index into used_maps[] */ u32 map_off; /* offset from value base address */ }; + struct { + enum bpf_reg_type reg_type; /* type of pseudo_btf_id */ + union { + u32 btf_id; /* btf_id for struct typed var */ + u32 mem_size; /* mem_size for non-struct typed var */ + }; + } btf_var; }; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ diff --git a/include/linux/btf.h b/include/linux/btf.h index 024e16ff7dcc..af1244180588 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -145,6 +145,21 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO; } +static inline bool btf_type_is_var(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_VAR; +} + +/* union is only a special case of struct: + * all its offsetof(member) == 0 + */ +static inline bool btf_type_is_struct(const struct btf_type *t) +{ + u8 kind = BTF_INFO_KIND(t->info); + + return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; +} + static inline u16 btf_type_vlen(const struct btf_type *t) { return BTF_INFO_VLEN(t->info); -- cgit v1.2.3 From eaa6bcb71ef6ed3dc18fc525ee7e293b06b4882b Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 29 Sep 2020 16:50:47 -0700 Subject: bpf: Introduce bpf_per_cpu_ptr() Add bpf_per_cpu_ptr() to help bpf programs access percpu vars. bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel except that it may return NULL. This happens when the cpu parameter is out of range. So the caller must check the returned value. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200929235049.2533242-5-haoluo@google.com --- include/linux/bpf.h | 4 ++++ include/linux/btf.h | 11 +++++++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 50e5c4b52bd1..9dde15b2479d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -293,6 +293,7 @@ enum bpf_arg_type { ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ + ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ __BPF_ARG_TYPE_MAX, }; @@ -307,6 +308,7 @@ enum bpf_return_type { RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ + RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -405,6 +407,7 @@ enum bpf_reg_type { PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ + PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ }; /* The information passed from prog-specific *_is_valid_access @@ -1828,6 +1831,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; +extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/btf.h b/include/linux/btf.h index af1244180588..2bf641829664 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -110,6 +110,11 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type, i < btf_type_vlen(struct_type); \ i++, member++) +#define for_each_vsi(i, datasec_type, member) \ + for (i = 0, member = btf_type_var_secinfo(datasec_type); \ + i < btf_type_vlen(datasec_type); \ + i++, member++) + static inline bool btf_type_is_ptr(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_PTR; @@ -194,6 +199,12 @@ static inline const struct btf_member *btf_type_member(const struct btf_type *t) return (const struct btf_member *)(t + 1); } +static inline const struct btf_var_secinfo *btf_type_var_secinfo( + const struct btf_type *t) +{ + return (const struct btf_var_secinfo *)(t + 1); +} + #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); -- cgit v1.2.3 From 63d9b80dcf2c67bc5ade61cbbaa09d7af21f43f1 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 29 Sep 2020 16:50:48 -0700 Subject: bpf: Introducte bpf_this_cpu_ptr() Add bpf_this_cpu_ptr() to help access percpu var on this cpu. This helper always returns a valid pointer, therefore no need to check returned value for NULL. Also note that all programs run with preemption disabled, which means that the returned pointer is stable during all the execution of the program. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200929235049.2533242-6-haoluo@google.com --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9dde15b2479d..dc63eeed4fd9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -309,6 +309,7 @@ enum bpf_return_type { RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ + RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -1832,6 +1833,7 @@ extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; +extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); -- cgit v1.2.3 From bfdc59701d6d100c99c3b987bcffd1c204e393c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 06:51:40 +0200 Subject: iov_iter: refactor rw_copy_check_uvector and import_iovec Split rw_copy_check_uvector into two new helpers with more sensible calling conventions: - iovec_from_user copies a iovec from userspace either into the provided stack buffer if it fits, or allocates a new buffer for it. Returns the actually used iovec. It also verifies that iov_len does fit a signed type, and handles compat iovecs if the compat flag is set. - __import_iovec consolidates the native and compat versions of import_iovec. It calls iovec_from_user, then validates each iovec actually points to user addresses, and ensures the total length doesn't overflow. This has two major implications: - the access_process_vm case loses the total lenght checking, which wasn't required anyway, given that each call receives two iovecs for the local and remote side of the operation, and it verifies the total length on the local side already. - instead of a single loop there now are two loops over the iovecs. Given that the iovecs are cache hot this doesn't make a major difference Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/compat.h | 16 +++++----------- include/linux/fs.h | 13 ------------- include/linux/uio.h | 12 +++++++++--- 3 files changed, 14 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 790be5ffc12c..cebcaac68aec 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -91,6 +91,11 @@ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* COMPAT_SYSCALL_DEFINEx */ +struct compat_iovec { + compat_uptr_t iov_base; + compat_size_t iov_len; +}; + #ifdef CONFIG_COMPAT #ifndef compat_user_stack_pointer @@ -248,11 +253,6 @@ typedef struct compat_siginfo { } _sifields; } compat_siginfo_t; -struct compat_iovec { - compat_uptr_t iov_base; - compat_size_t iov_len; -}; - struct compat_rlimit { compat_ulong_t rlim_cur; compat_ulong_t rlim_max; @@ -451,12 +451,6 @@ extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request, struct epoll_event; /* fortunately, this one is fixed-layout */ -extern ssize_t compat_rw_copy_check_uvector(int type, - const struct compat_iovec __user *uvector, - unsigned long nr_segs, - unsigned long fast_segs, struct iovec *fast_pointer, - struct iovec **ret_pointer); - extern void __user *compat_alloc_user_space(unsigned long len); int compat_restore_altstack(const compat_stack_t __user *uss); diff --git a/include/linux/fs.h b/include/linux/fs.h index e019ea2f1347..b9fdac75ba06 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -178,14 +178,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File supports async buffered reads */ #define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000) -/* - * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector - * that indicates that they should check the contents of the iovec are - * valid, but not check the memory that the iovec elements - * points too. - */ -#define CHECK_IOVEC_ONLY -1 - /* * Attribute flags. These should be or-ed together to figure out what * has been changed! @@ -1887,11 +1879,6 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma) return file->f_op->mmap(file, vma); } -ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, - unsigned long nr_segs, unsigned long fast_segs, - struct iovec *fast_pointer, - struct iovec **ret_pointer); - extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, diff --git a/include/linux/uio.h b/include/linux/uio.h index 3835a8a8e9ea..92c11fe41c62 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -266,9 +266,15 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, struct iov_iter *i); -ssize_t import_iovec(int type, const struct iovec __user * uvector, - unsigned nr_segs, unsigned fast_segs, - struct iovec **iov, struct iov_iter *i); +struct iovec *iovec_from_user(const struct iovec __user *uvector, + unsigned long nr_segs, unsigned long fast_segs, + struct iovec *fast_iov, bool compat); +ssize_t import_iovec(int type, const struct iovec __user *uvec, + unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, + struct iov_iter *i); +ssize_t __import_iovec(int type, const struct iovec __user *uvec, + unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, + struct iov_iter *i, bool compat); #ifdef CONFIG_COMPAT struct compat_iovec; -- cgit v1.2.3 From 89cd35c58bc2e36bfdc23dde67a429b08cf4ae03 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 06:51:41 +0200 Subject: iov_iter: transparently handle compat iovecs in import_iovec Use in compat_syscall to import either native or the compat iovecs, and remove the now superflous compat_import_iovec. This removes the need for special compat logic in most callers, and the remaining ones can still be simplified by using __import_iovec with a bool compat parameter. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/uio.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 92c11fe41c62..daedc61ad370 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -275,14 +275,6 @@ ssize_t import_iovec(int type, const struct iovec __user *uvec, ssize_t __import_iovec(int type, const struct iovec __user *uvec, unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, struct iov_iter *i, bool compat); - -#ifdef CONFIG_COMPAT -struct compat_iovec; -ssize_t compat_import_iovec(int type, const struct compat_iovec __user * uvector, - unsigned nr_segs, unsigned fast_segs, - struct iovec **iov, struct iov_iter *i); -#endif - int import_single_range(int type, void __user *buf, size_t len, struct iovec *iov, struct iov_iter *i); -- cgit v1.2.3 From 3523a9d45478984169d7f4416a00c83afc964e2a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 06:51:42 +0200 Subject: fs: remove various compat readv/writev helpers Now that import_iovec handles compat iovecs as well, all the duplicated code in the compat readv/writev helpers is not needed. Remove them and switch the compat syscall handlers to use the native helpers. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/compat.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index cebcaac68aec..36b5842162c7 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -546,25 +546,25 @@ asmlinkage long compat_sys_getdents(unsigned int fd, /* fs/read_write.c */ asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int); asmlinkage ssize_t compat_sys_readv(compat_ulong_t fd, - const struct compat_iovec __user *vec, compat_ulong_t vlen); + const struct iovec __user *vec, compat_ulong_t vlen); asmlinkage ssize_t compat_sys_writev(compat_ulong_t fd, - const struct compat_iovec __user *vec, compat_ulong_t vlen); + const struct iovec __user *vec, compat_ulong_t vlen); /* No generic prototype for pread64 and pwrite64 */ asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd, - const struct compat_iovec __user *vec, + const struct iovec __user *vec, compat_ulong_t vlen, u32 pos_low, u32 pos_high); asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd, - const struct compat_iovec __user *vec, + const struct iovec __user *vec, compat_ulong_t vlen, u32 pos_low, u32 pos_high); #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 asmlinkage long compat_sys_preadv64(unsigned long fd, - const struct compat_iovec __user *vec, + const struct iovec __user *vec, unsigned long vlen, loff_t pos); #endif #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 asmlinkage long compat_sys_pwritev64(unsigned long fd, - const struct compat_iovec __user *vec, + const struct iovec __user *vec, unsigned long vlen, loff_t pos); #endif @@ -800,20 +800,20 @@ asmlinkage long compat_sys_execveat(int dfd, const char __user *filename, const compat_uptr_t __user *argv, const compat_uptr_t __user *envp, int flags); asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd, - const struct compat_iovec __user *vec, + const struct iovec __user *vec, compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd, - const struct compat_iovec __user *vec, + const struct iovec __user *vec, compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 asmlinkage long compat_sys_preadv64v2(unsigned long fd, - const struct compat_iovec __user *vec, + const struct iovec __user *vec, unsigned long vlen, loff_t pos, rwf_t flags); #endif #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 asmlinkage long compat_sys_pwritev64v2(unsigned long fd, - const struct compat_iovec __user *vec, + const struct iovec __user *vec, unsigned long vlen, loff_t pos, rwf_t flags); #endif -- cgit v1.2.3 From 5f764d624a89d4d00d282157077878d4e7c69869 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 06:51:43 +0200 Subject: fs: remove the compat readv/writev syscalls Now that import_iovec handles compat iovecs, the native readv and writev syscalls can be used for the compat case as well. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/compat.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 36b5842162c7..07268fc8082b 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -545,10 +545,6 @@ asmlinkage long compat_sys_getdents(unsigned int fd, /* fs/read_write.c */ asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int); -asmlinkage ssize_t compat_sys_readv(compat_ulong_t fd, - const struct iovec __user *vec, compat_ulong_t vlen); -asmlinkage ssize_t compat_sys_writev(compat_ulong_t fd, - const struct iovec __user *vec, compat_ulong_t vlen); /* No generic prototype for pread64 and pwrite64 */ asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd, const struct iovec __user *vec, -- cgit v1.2.3 From 598b3cec831fd6ccb3cbe4919a722e868c6364a8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 06:51:44 +0200 Subject: fs: remove compat_sys_vmsplice Now that import_iovec handles compat iovecs, the native vmsplice syscall can be used for the compat case as well. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/compat.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 07268fc8082b..7c3e876703cf 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -597,10 +597,6 @@ asmlinkage long compat_sys_signalfd4(int ufd, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize, int flags); -/* fs/splice.c */ -asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *, - unsigned int nr_segs, unsigned int flags); - /* fs/stat.c */ asmlinkage long compat_sys_newfstatat(unsigned int dfd, const char __user *filename, -- cgit v1.2.3 From c3973b401ef2b0b8005f8074a10e96e3ea093823 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 06:51:45 +0200 Subject: mm: remove compat_process_vm_{readv,writev} Now that import_iovec handles compat iovecs, the native syscalls can be used for the compat case as well. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/compat.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 7c3e876703cf..3e3d2beafed3 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -780,14 +780,6 @@ asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, int flags); asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags); -asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid, - const struct compat_iovec __user *lvec, - compat_ulong_t liovcnt, const struct compat_iovec __user *rvec, - compat_ulong_t riovcnt, compat_ulong_t flags); -asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, - const struct compat_iovec __user *lvec, - compat_ulong_t liovcnt, const struct compat_iovec __user *rvec, - compat_ulong_t riovcnt, compat_ulong_t flags); asmlinkage long compat_sys_execveat(int dfd, const char __user *filename, const compat_uptr_t __user *argv, const compat_uptr_t __user *envp, int flags); -- cgit v1.2.3 From 51cf18c90ca1b51d1cb4af3064e85fcf8610b5d2 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Fri, 28 Aug 2020 10:00:49 +0100 Subject: sched/debug: Add new tracepoint to track cpu_capacity rq->cpu_capacity is a key element in several scheduler parts, such as EAS task placement and load balancing. Tracking this value enables testing and/or debugging by a toolkit. Signed-off-by: Vincent Donnefort Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1598605249-72651-1-git-send-email-vincent.donnefort@arm.com --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2bf0af19a62a..f516c18eeb20 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2044,6 +2044,7 @@ const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq); const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq); int sched_trace_rq_cpu(struct rq *rq); +int sched_trace_rq_cpu_capacity(struct rq *rq); int sched_trace_rq_nr_running(struct rq *rq); const struct cpumask *sched_trace_rd_span(struct root_domain *rd); -- cgit v1.2.3 From 8e1b3884eed7d96d4f1210b668611ee6a1803ea5 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 1 Oct 2020 17:12:50 +0000 Subject: net: remove NETDEV_HW_ADDR_T_SLAVE NETDEV_HW_ADDR_T_SLAVE is not used anymore, remove it. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 28cfa53daf72..0c79d9e56a5e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -212,9 +212,8 @@ struct netdev_hw_addr { unsigned char type; #define NETDEV_HW_ADDR_T_LAN 1 #define NETDEV_HW_ADDR_T_SAN 2 -#define NETDEV_HW_ADDR_T_SLAVE 3 -#define NETDEV_HW_ADDR_T_UNICAST 4 -#define NETDEV_HW_ADDR_T_MULTICAST 5 +#define NETDEV_HW_ADDR_T_UNICAST 3 +#define NETDEV_HW_ADDR_T_MULTICAST 4 bool global_use; int sync_cnt; int refcount; -- cgit v1.2.3 From 19fbcb36a39eefbe8912a13ccc02e937b1c418d6 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 3 Oct 2020 00:44:28 +0200 Subject: net/sched: act_vlan: Add {POP,PUSH}_ETH actions Implement TCA_VLAN_ACT_POP_ETH and TCA_VLAN_ACT_PUSH_ETH, to respectively pop and push a base Ethernet header at the beginning of a frame. POP_ETH is just a matter of pulling ETH_HLEN bytes. VLAN tags, if any, must be stripped before calling POP_ETH. PUSH_ETH is restricted to skbs with no mac_header, and only the MAC addresses can be configured. The Ethertype is automatically set from skb->protocol. These restrictions ensure that all skb's fields remain consistent, so that this action can't confuse other part of the networking stack (like GSO). Since openvswitch already had these actions, consolidate the code in skbuff.c (like for vlan and mpls push/pop). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3d0cf3722bb4..42131e325e27 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3573,6 +3573,9 @@ int skb_ensure_writable(struct sk_buff *skb, int write_len); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); +int skb_eth_pop(struct sk_buff *skb); +int skb_eth_push(struct sk_buff *skb, const unsigned char *dst, + const unsigned char *src); int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, int mac_len, bool ethernet); int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len, -- cgit v1.2.3 From 24ce66c04a06a678f156cf575128246f3d214b4a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Sep 2020 07:58:18 +0200 Subject: uaccess: provide a generic TASK_SIZE_MAX definition Define TASK_SIZE_MAX as TASK_SIZE if not otherwise defined. Signed-off-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- include/linux/uaccess.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 70073c802b48..d0e43761c708 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -31,6 +31,10 @@ typedef struct { /* empty dummy */ } mm_segment_t; +#ifndef TASK_SIZE_MAX +#define TASK_SIZE_MAX TASK_SIZE +#endif + #define uaccess_kernel() (false) #define user_addr_max() (TASK_SIZE_MAX) -- cgit v1.2.3 From 8438f5211479e4b8433f641634362264bc3bbd9e Mon Sep 17 00:00:00 2001 From: Tingwei Zhang Date: Mon, 5 Oct 2020 10:13:13 +0300 Subject: tracing: Add flag to control different traces More traces like event trace or trace marker will be supported. Add flag for difference traces, so that they can be controlled separately. Move current function trace to it's own flag instead of global ftrace enable flag. Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Alexander Shishkin Signed-off-by: Tingwei Zhang Signed-off-by: Alexander Shishkin Link: https://lore.kernel.org/r/20201005071319.78508-3-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/trace.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace.h b/include/linux/trace.h index 36d255d66f88..c115a5d2269f 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -3,6 +3,9 @@ #define _LINUX_TRACE_H #ifdef CONFIG_TRACING + +#define TRACE_EXPORT_FUNCTION BIT(0) + /* * The trace export - an export of Ftrace output. The trace_export * can process traces and export them to a registered destination as @@ -15,10 +18,12 @@ * next - pointer to the next trace_export * write - copy traces which have been delt with ->commit() to * the destination + * flags - which ftrace to be exported */ struct trace_export { struct trace_export __rcu *next; void (*write)(struct trace_export *, const void *, unsigned int); + int flags; }; int register_ftrace_export(struct trace_export *export); -- cgit v1.2.3 From 8ab7a2b7055c88c3da5e4684dfa015c6a8987c28 Mon Sep 17 00:00:00 2001 From: Tingwei Zhang Date: Mon, 5 Oct 2020 10:13:14 +0300 Subject: tracing: Add trace_export support for event trace Only function traces can be exported to other destinations currently. This patch exports event trace as well. Move trace export related function to the beginning of file so other trace can call trace_process_export() to export. Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Alexander Shishkin Signed-off-by: Tingwei Zhang Signed-off-by: Alexander Shishkin Link: https://lore.kernel.org/r/20201005071319.78508-4-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/trace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/trace.h b/include/linux/trace.h index c115a5d2269f..86033d214972 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -5,6 +5,7 @@ #ifdef CONFIG_TRACING #define TRACE_EXPORT_FUNCTION BIT(0) +#define TRACE_EXPORT_EVENT BIT(1) /* * The trace export - an export of Ftrace output. The trace_export -- cgit v1.2.3 From 458999c6f67b0ffcc704a4892041dd700adf7d83 Mon Sep 17 00:00:00 2001 From: Tingwei Zhang Date: Mon, 5 Oct 2020 10:13:15 +0300 Subject: tracing: Add trace_export support for trace_marker Add the support to route trace_marker buffer to other destination via trace_export. Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Alexander Shishkin Signed-off-by: Tingwei Zhang Signed-off-by: Alexander Shishkin Link: https://lore.kernel.org/r/20201005071319.78508-5-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/trace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/trace.h b/include/linux/trace.h index 86033d214972..886a4ffd9d45 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -6,6 +6,7 @@ #define TRACE_EXPORT_FUNCTION BIT(0) #define TRACE_EXPORT_EVENT BIT(1) +#define TRACE_EXPORT_MARKER BIT(2) /* * The trace export - an export of Ftrace output. The trace_export -- cgit v1.2.3 From 07f8569fbe44f35e53c7a5946fbb348432c68377 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sun, 4 Oct 2020 21:32:00 +0200 Subject: w1: Constify struct w1_family_ops The fops field in the w1_family struct is never modified. Make it const to indicate that. Constifying the pointer makes it possible for drivers to declare static w1_family_ops structs const, which in turn will allow the compiler to put it in read-only memory. Reviewed-by: Sebastian Reichel Signed-off-by: Rikard Falkeborn Link: https://lore.kernel.org/r/20201004193202.4044-2-rikard.falkeborn@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/w1.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/w1.h b/include/linux/w1.h index cebf3464bc03..949d3b10e531 100644 --- a/include/linux/w1.h +++ b/include/linux/w1.h @@ -269,7 +269,7 @@ struct w1_family { struct list_head family_entry; u8 fid; - struct w1_family_ops *fops; + const struct w1_family_ops *fops; const struct of_device_id *of_match_table; -- cgit v1.2.3 From c307459b9d1fcb8bbf3ea5a4162979532322ef77 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Oct 2020 10:38:13 -0700 Subject: fs/kernel_read_file: Remove FIRMWARE_PREALLOC_BUFFER enum FIRMWARE_PREALLOC_BUFFER is a "how", not a "what", and confuses the LSMs that are interested in filtering between types of things. The "how" should be an internal detail made uninteresting to the LSMs. Fixes: a098ecd2fa7d ("firmware: support loading into a pre-allocated buffer") Fixes: fd90bc559bfb ("ima: based on policy verify firmware signatures (pre-allocated buffer)") Fixes: 4f0496d8ffa3 ("ima: based on policy warn about loading firmware (pre-allocated buffer)") Signed-off-by: Kees Cook Reviewed-by: Mimi Zohar Reviewed-by: Luis Chamberlain Acked-by: Scott Branden Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201002173828.2099543-2-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7519ae003a08..7336e22d0c5d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2858,10 +2858,10 @@ static inline void i_readcount_inc(struct inode *inode) #endif extern int do_pipe_flags(int *, int); +/* This is a list of *what* is being read, not *how*. */ #define __kernel_read_file_id(id) \ id(UNKNOWN, unknown) \ id(FIRMWARE, firmware) \ - id(FIRMWARE_PREALLOC_BUFFER, firmware) \ id(FIRMWARE_EFI_EMBEDDED, firmware) \ id(MODULE, kernel-module) \ id(KEXEC_IMAGE, kexec-image) \ -- cgit v1.2.3 From 06e67b849ab910a49a629445f43edb074153d0eb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Oct 2020 10:38:14 -0700 Subject: fs/kernel_read_file: Remove FIRMWARE_EFI_EMBEDDED enum The "FIRMWARE_EFI_EMBEDDED" enum is a "where", not a "what". It should not be distinguished separately from just "FIRMWARE", as this confuses the LSMs about what is being loaded. Additionally, there was no actual validation of the firmware contents happening. Fixes: e4c2c0ff00ec ("firmware: Add new platform fallback mechanism and firmware_request_platform()") Signed-off-by: Kees Cook Reviewed-by: Luis Chamberlain Acked-by: Scott Branden Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201002173828.2099543-3-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7336e22d0c5d..3fb7af12d033 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2858,11 +2858,10 @@ static inline void i_readcount_inc(struct inode *inode) #endif extern int do_pipe_flags(int *, int); -/* This is a list of *what* is being read, not *how*. */ +/* This is a list of *what* is being read, not *how* nor *where*. */ #define __kernel_read_file_id(id) \ id(UNKNOWN, unknown) \ id(FIRMWARE, firmware) \ - id(FIRMWARE_EFI_EMBEDDED, firmware) \ id(MODULE, kernel-module) \ id(KEXEC_IMAGE, kexec-image) \ id(KEXEC_INITRAMFS, kexec-initramfs) \ -- cgit v1.2.3 From b89999d004931ab2e5123611ace7dab77328f8d6 Mon Sep 17 00:00:00 2001 From: Scott Branden Date: Fri, 2 Oct 2020 10:38:15 -0700 Subject: fs/kernel_read_file: Split into separate include file Move kernel_read_file* out of linux/fs.h to its own linux/kernel_read_file.h include file. That header gets pulled in just about everywhere and doesn't really need functions not related to the general fs interface. Suggested-by: Christoph Hellwig Signed-off-by: Scott Branden Signed-off-by: Kees Cook Reviewed-by: Christoph Hellwig Reviewed-by: Mimi Zohar Reviewed-by: Luis Chamberlain Acked-by: Greg Kroah-Hartman Acked-by: James Morris Link: https://lore.kernel.org/r/20200706232309.12010-2-scott.branden@broadcom.com Link: https://lore.kernel.org/r/20201002173828.2099543-4-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/fs.h | 38 ------------------------------ include/linux/ima.h | 1 + include/linux/kernel_read_file.h | 51 ++++++++++++++++++++++++++++++++++++++++ include/linux/security.h | 1 + 4 files changed, 53 insertions(+), 38 deletions(-) create mode 100644 include/linux/kernel_read_file.h (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3fb7af12d033..0885d53afb11 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2858,44 +2858,6 @@ static inline void i_readcount_inc(struct inode *inode) #endif extern int do_pipe_flags(int *, int); -/* This is a list of *what* is being read, not *how* nor *where*. */ -#define __kernel_read_file_id(id) \ - id(UNKNOWN, unknown) \ - id(FIRMWARE, firmware) \ - id(MODULE, kernel-module) \ - id(KEXEC_IMAGE, kexec-image) \ - id(KEXEC_INITRAMFS, kexec-initramfs) \ - id(POLICY, security-policy) \ - id(X509_CERTIFICATE, x509-certificate) \ - id(MAX_ID, ) - -#define __fid_enumify(ENUM, dummy) READING_ ## ENUM, -#define __fid_stringify(dummy, str) #str, - -enum kernel_read_file_id { - __kernel_read_file_id(__fid_enumify) -}; - -static const char * const kernel_read_file_str[] = { - __kernel_read_file_id(__fid_stringify) -}; - -static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) -{ - if ((unsigned)id >= READING_MAX_ID) - return kernel_read_file_str[READING_UNKNOWN]; - - return kernel_read_file_str[id]; -} - -extern int kernel_read_file(struct file *, void **, loff_t *, loff_t, - enum kernel_read_file_id); -extern int kernel_read_file_from_path(const char *, void **, loff_t *, loff_t, - enum kernel_read_file_id); -extern int kernel_read_file_from_path_initns(const char *, void **, loff_t *, loff_t, - enum kernel_read_file_id); -extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t, - enum kernel_read_file_id); extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *); ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos); extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *); diff --git a/include/linux/ima.h b/include/linux/ima.h index d15100de6cdd..64804f78408b 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -7,6 +7,7 @@ #ifndef _LINUX_IMA_H #define _LINUX_IMA_H +#include #include #include #include diff --git a/include/linux/kernel_read_file.h b/include/linux/kernel_read_file.h new file mode 100644 index 000000000000..78cf3d7dc835 --- /dev/null +++ b/include/linux/kernel_read_file.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KERNEL_READ_FILE_H +#define _LINUX_KERNEL_READ_FILE_H + +#include +#include + +/* This is a list of *what* is being read, not *how* nor *where*. */ +#define __kernel_read_file_id(id) \ + id(UNKNOWN, unknown) \ + id(FIRMWARE, firmware) \ + id(MODULE, kernel-module) \ + id(KEXEC_IMAGE, kexec-image) \ + id(KEXEC_INITRAMFS, kexec-initramfs) \ + id(POLICY, security-policy) \ + id(X509_CERTIFICATE, x509-certificate) \ + id(MAX_ID, ) + +#define __fid_enumify(ENUM, dummy) READING_ ## ENUM, +#define __fid_stringify(dummy, str) #str, + +enum kernel_read_file_id { + __kernel_read_file_id(__fid_enumify) +}; + +static const char * const kernel_read_file_str[] = { + __kernel_read_file_id(__fid_stringify) +}; + +static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) +{ + if ((unsigned int)id >= READING_MAX_ID) + return kernel_read_file_str[READING_UNKNOWN]; + + return kernel_read_file_str[id]; +} + +int kernel_read_file(struct file *file, + void **buf, loff_t *size, loff_t max_size, + enum kernel_read_file_id id); +int kernel_read_file_from_path(const char *path, + void **buf, loff_t *size, loff_t max_size, + enum kernel_read_file_id id); +int kernel_read_file_from_path_initns(const char *path, + void **buf, loff_t *size, loff_t max_size, + enum kernel_read_file_id id); +int kernel_read_file_from_fd(int fd, + void **buf, loff_t *size, loff_t max_size, + enum kernel_read_file_id id); + +#endif /* _LINUX_KERNEL_READ_FILE_H */ diff --git a/include/linux/security.h b/include/linux/security.h index 0a0a03b36a3b..42df0d9b4c37 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -23,6 +23,7 @@ #ifndef __LINUX_SECURITY_H #define __LINUX_SECURITY_H +#include #include #include #include -- cgit v1.2.3 From f7a4f689bca6072492626938aad6dd2f32c5bf97 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Oct 2020 10:38:17 -0700 Subject: fs/kernel_read_file: Remove redundant size argument In preparation for refactoring kernel_read_file*(), remove the redundant "size" argument which is not needed: it can be included in the return code, with callers adjusted. (VFS reads already cannot be larger than INT_MAX.) Signed-off-by: Kees Cook Reviewed-by: Mimi Zohar Reviewed-by: Luis Chamberlain Reviewed-by: James Morris Acked-by: Scott Branden Link: https://lore.kernel.org/r/20201002173828.2099543-6-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/kernel_read_file.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel_read_file.h b/include/linux/kernel_read_file.h index 78cf3d7dc835..0ca0bdbed1bd 100644 --- a/include/linux/kernel_read_file.h +++ b/include/linux/kernel_read_file.h @@ -36,16 +36,16 @@ static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) } int kernel_read_file(struct file *file, - void **buf, loff_t *size, loff_t max_size, + void **buf, loff_t max_size, enum kernel_read_file_id id); int kernel_read_file_from_path(const char *path, - void **buf, loff_t *size, loff_t max_size, + void **buf, loff_t max_size, enum kernel_read_file_id id); int kernel_read_file_from_path_initns(const char *path, - void **buf, loff_t *size, loff_t max_size, + void **buf, loff_t max_size, enum kernel_read_file_id id); int kernel_read_file_from_fd(int fd, - void **buf, loff_t *size, loff_t max_size, + void **buf, loff_t max_size, enum kernel_read_file_id id); #endif /* _LINUX_KERNEL_READ_FILE_H */ -- cgit v1.2.3 From 113eeb517780add2b38932a61d4e4440a73eb72a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Oct 2020 10:38:18 -0700 Subject: fs/kernel_read_file: Switch buffer size arg to size_t In preparation for further refactoring of kernel_read_file*(), rename the "max_size" argument to the more accurate "buf_size", and correct its type to size_t. Add kerndoc to explain the specifics of how the arguments will be used. Note that with buf_size now size_t, it can no longer be negative (and was never called with a negative value). Adjust callers to use it as a "maximum size" when *buf is NULL. Signed-off-by: Kees Cook Reviewed-by: Mimi Zohar Reviewed-by: Luis Chamberlain Reviewed-by: James Morris Acked-by: Scott Branden Link: https://lore.kernel.org/r/20201002173828.2099543-7-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/kernel_read_file.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel_read_file.h b/include/linux/kernel_read_file.h index 0ca0bdbed1bd..910039e7593e 100644 --- a/include/linux/kernel_read_file.h +++ b/include/linux/kernel_read_file.h @@ -36,16 +36,16 @@ static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) } int kernel_read_file(struct file *file, - void **buf, loff_t max_size, + void **buf, size_t buf_size, enum kernel_read_file_id id); int kernel_read_file_from_path(const char *path, - void **buf, loff_t max_size, + void **buf, size_t buf_size, enum kernel_read_file_id id); int kernel_read_file_from_path_initns(const char *path, - void **buf, loff_t max_size, + void **buf, size_t buf_size, enum kernel_read_file_id id); int kernel_read_file_from_fd(int fd, - void **buf, loff_t max_size, + void **buf, size_t buf_size, enum kernel_read_file_id id); #endif /* _LINUX_KERNEL_READ_FILE_H */ -- cgit v1.2.3 From 885352881f11f1f3113d8eb877786bcb6d720544 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Oct 2020 10:38:19 -0700 Subject: fs/kernel_read_file: Add file_size output argument In preparation for adding partial read support, add an optional output argument to kernel_read_file*() that reports the file size so callers can reason more easily about their reading progress. Signed-off-by: Kees Cook Reviewed-by: Mimi Zohar Reviewed-by: Luis Chamberlain Reviewed-by: James Morris Acked-by: Scott Branden Link: https://lore.kernel.org/r/20201002173828.2099543-8-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/kernel_read_file.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel_read_file.h b/include/linux/kernel_read_file.h index 910039e7593e..023293eaf948 100644 --- a/include/linux/kernel_read_file.h +++ b/include/linux/kernel_read_file.h @@ -37,15 +37,19 @@ static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) int kernel_read_file(struct file *file, void **buf, size_t buf_size, + size_t *file_size, enum kernel_read_file_id id); int kernel_read_file_from_path(const char *path, void **buf, size_t buf_size, + size_t *file_size, enum kernel_read_file_id id); int kernel_read_file_from_path_initns(const char *path, void **buf, size_t buf_size, + size_t *file_size, enum kernel_read_file_id id); int kernel_read_file_from_fd(int fd, void **buf, size_t buf_size, + size_t *file_size, enum kernel_read_file_id id); #endif /* _LINUX_KERNEL_READ_FILE_H */ -- cgit v1.2.3 From b64fcae74b6d6940d14243c963ab0089e8f0d82d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Oct 2020 10:38:20 -0700 Subject: LSM: Introduce kernel_post_load_data() hook There are a few places in the kernel where LSMs would like to have visibility into the contents of a kernel buffer that has been loaded or read. While security_kernel_post_read_file() (which includes the buffer) exists as a pairing for security_kernel_read_file(), no such hook exists to pair with security_kernel_load_data(). Earlier proposals for just using security_kernel_post_read_file() with a NULL file argument were rejected (i.e. "file" should always be valid for the security_..._file hooks, but it appears at least one case was left in the kernel during earlier refactoring. (This will be fixed in a subsequent patch.) Since not all cases of security_kernel_load_data() can have a single contiguous buffer made available to the LSM hook (e.g. kexec image segments are separately loaded), there needs to be a way for the LSM to reason about its expectations of the hook coverage. In order to handle this, add a "contents" argument to the "kernel_load_data" hook that indicates if the newly added "kernel_post_load_data" hook will be called with the full contents once loaded. That way, LSMs requiring full contents can choose to unilaterally reject "kernel_load_data" with contents=false (which is effectively the existing hook coverage), but when contents=true they can allow it and later evaluate the "kernel_post_load_data" hook once the buffer is loaded. With this change, LSMs can gain coverage over non-file-backed data loads (e.g. init_module(2) and firmware userspace helper), which will happen in subsequent patches. Additionally prepare IMA to start processing these cases. Signed-off-by: Kees Cook Reviewed-by: KP Singh Link: https://lore.kernel.org/r/20201002173828.2099543-9-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/ima.h | 13 +++++++++++-- include/linux/lsm_hook_defs.h | 4 +++- include/linux/lsm_hooks.h | 10 ++++++++++ include/linux/security.h | 14 ++++++++++++-- 4 files changed, 36 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 64804f78408b..af9fb8c5f16a 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -20,7 +20,9 @@ extern void ima_post_create_tmpfile(struct inode *inode); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); extern int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot); -extern int ima_load_data(enum kernel_load_data_id id); +extern int ima_load_data(enum kernel_load_data_id id, bool contents); +extern int ima_post_load_data(char *buf, loff_t size, + enum kernel_load_data_id id, char *description); extern int ima_read_file(struct file *file, enum kernel_read_file_id id); extern int ima_post_read_file(struct file *file, void *buf, loff_t size, enum kernel_read_file_id id); @@ -78,7 +80,14 @@ static inline int ima_file_mprotect(struct vm_area_struct *vma, return 0; } -static inline int ima_load_data(enum kernel_load_data_id id) +static inline int ima_load_data(enum kernel_load_data_id id, bool contents) +{ + return 0; +} + +static inline int ima_post_load_data(char *buf, loff_t size, + enum kernel_load_data_id id, + char *description) { return 0; } diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 2a8c74d99015..83c6f1f5cc1e 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -184,7 +184,9 @@ LSM_HOOK(void, LSM_RET_VOID, cred_getsecid, const struct cred *c, u32 *secid) LSM_HOOK(int, 0, kernel_act_as, struct cred *new, u32 secid) LSM_HOOK(int, 0, kernel_create_files_as, struct cred *new, struct inode *inode) LSM_HOOK(int, 0, kernel_module_request, char *kmod_name) -LSM_HOOK(int, 0, kernel_load_data, enum kernel_load_data_id id) +LSM_HOOK(int, 0, kernel_load_data, enum kernel_load_data_id id, bool contents) +LSM_HOOK(int, 0, kernel_post_load_data, char *buf, loff_t size, + enum kernel_read_file_id id, char *description) LSM_HOOK(int, 0, kernel_read_file, struct file *file, enum kernel_read_file_id id) LSM_HOOK(int, 0, kernel_post_read_file, struct file *file, char *buf, diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 9e2e3e63719d..6bb4f1a0158c 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -635,7 +635,17 @@ * @kernel_load_data: * Load data provided by userspace. * @id kernel load data identifier + * @contents if a subsequent @kernel_post_load_data will be called. * Return 0 if permission is granted. + * @kernel_post_load_data: + * Load data provided by a non-file source (usually userspace buffer). + * @buf pointer to buffer containing the data contents. + * @size length of the data contents. + * @id kernel load data identifier + * @description a text description of what was loaded, @id-specific + * Return 0 if permission is granted. + * This must be paired with a prior @kernel_load_data call that had + * @contents set to true. * @kernel_read_file: * Read a file specified by userspace. * @file contains the file structure pointing to the file being read diff --git a/include/linux/security.h b/include/linux/security.h index 42df0d9b4c37..51c8e4e6b7cc 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -387,7 +387,10 @@ void security_cred_getsecid(const struct cred *c, u32 *secid); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); int security_kernel_module_request(char *kmod_name); -int security_kernel_load_data(enum kernel_load_data_id id); +int security_kernel_load_data(enum kernel_load_data_id id, bool contents); +int security_kernel_post_load_data(char *buf, loff_t size, + enum kernel_load_data_id id, + char *description); int security_kernel_read_file(struct file *file, enum kernel_read_file_id id); int security_kernel_post_read_file(struct file *file, char *buf, loff_t size, enum kernel_read_file_id id); @@ -1014,7 +1017,14 @@ static inline int security_kernel_module_request(char *kmod_name) return 0; } -static inline int security_kernel_load_data(enum kernel_load_data_id id) +static inline int security_kernel_load_data(enum kernel_load_data_id id, bool contents) +{ + return 0; +} + +static inline int security_kernel_post_load_data(char *buf, loff_t size, + enum kernel_load_data_id id, + char *description) { return 0; } -- cgit v1.2.3 From 2039bda1fa8dad3f4275b29eeaffef545bcbc85d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Oct 2020 10:38:23 -0700 Subject: LSM: Add "contents" flag to kernel_read_file hook As with the kernel_load_data LSM hook, add a "contents" flag to the kernel_read_file LSM hook that indicates whether the LSM can expect a matching call to the kernel_post_read_file LSM hook with the full contents of the file. With the coming addition of partial file read support for kernel_read_file*() API, the LSM will no longer be able to always see the entire contents of a file during the read calls. For cases where the LSM must read examine the complete file contents, it will need to do so on its own every time the kernel_read_file hook is called with contents=false (or reject such cases). Adjust all existing LSMs to retain existing behavior. Signed-off-by: Kees Cook Reviewed-by: Mimi Zohar Link: https://lore.kernel.org/r/20201002173828.2099543-12-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/ima.h | 6 ++++-- include/linux/lsm_hook_defs.h | 2 +- include/linux/lsm_hooks.h | 3 +++ include/linux/security.h | 6 ++++-- 4 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index af9fb8c5f16a..8fa7bcfb2da2 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -23,7 +23,8 @@ extern int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot); extern int ima_load_data(enum kernel_load_data_id id, bool contents); extern int ima_post_load_data(char *buf, loff_t size, enum kernel_load_data_id id, char *description); -extern int ima_read_file(struct file *file, enum kernel_read_file_id id); +extern int ima_read_file(struct file *file, enum kernel_read_file_id id, + bool contents); extern int ima_post_read_file(struct file *file, void *buf, loff_t size, enum kernel_read_file_id id); extern void ima_post_path_mknod(struct dentry *dentry); @@ -92,7 +93,8 @@ static inline int ima_post_load_data(char *buf, loff_t size, return 0; } -static inline int ima_read_file(struct file *file, enum kernel_read_file_id id) +static inline int ima_read_file(struct file *file, enum kernel_read_file_id id, + bool contents) { return 0; } diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 83c6f1f5cc1e..d67cb3502310 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -188,7 +188,7 @@ LSM_HOOK(int, 0, kernel_load_data, enum kernel_load_data_id id, bool contents) LSM_HOOK(int, 0, kernel_post_load_data, char *buf, loff_t size, enum kernel_read_file_id id, char *description) LSM_HOOK(int, 0, kernel_read_file, struct file *file, - enum kernel_read_file_id id) + enum kernel_read_file_id id, bool contents) LSM_HOOK(int, 0, kernel_post_read_file, struct file *file, char *buf, loff_t size, enum kernel_read_file_id id) LSM_HOOK(int, 0, task_fix_setuid, struct cred *new, const struct cred *old, diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 6bb4f1a0158c..8814e3d5952d 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -651,6 +651,7 @@ * @file contains the file structure pointing to the file being read * by the kernel. * @id kernel read file identifier + * @contents if a subsequent @kernel_post_read_file will be called. * Return 0 if permission is granted. * @kernel_post_read_file: * Read a file specified by userspace. @@ -659,6 +660,8 @@ * @buf pointer to buffer containing the file contents. * @size length of the file contents. * @id kernel read file identifier + * This must be paired with a prior @kernel_read_file call that had + * @contents set to true. * Return 0 if permission is granted. * @task_fix_setuid: * Update the module's state after setting one or more of the user diff --git a/include/linux/security.h b/include/linux/security.h index 51c8e4e6b7cc..bc2725491560 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -391,7 +391,8 @@ int security_kernel_load_data(enum kernel_load_data_id id, bool contents); int security_kernel_post_load_data(char *buf, loff_t size, enum kernel_load_data_id id, char *description); -int security_kernel_read_file(struct file *file, enum kernel_read_file_id id); +int security_kernel_read_file(struct file *file, enum kernel_read_file_id id, + bool contents); int security_kernel_post_read_file(struct file *file, char *buf, loff_t size, enum kernel_read_file_id id); int security_task_fix_setuid(struct cred *new, const struct cred *old, @@ -1030,7 +1031,8 @@ static inline int security_kernel_post_load_data(char *buf, loff_t size, } static inline int security_kernel_read_file(struct file *file, - enum kernel_read_file_id id) + enum kernel_read_file_id id, + bool contents) { return 0; } -- cgit v1.2.3 From 0fa8e084648779eeb8929ae004301b3acf3bad84 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Oct 2020 10:38:25 -0700 Subject: fs/kernel_file_read: Add "offset" arg for partial reads To perform partial reads, callers of kernel_read_file*() must have a non-NULL file_size argument and a preallocated buffer. The new "offset" argument can then be used to seek to specific locations in the file to fill the buffer to, at most, "buf_size" per call. Where possible, the LSM hooks can report whether a full file has been read or not so that the contents can be reasoned about. Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201002173828.2099543-14-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/kernel_read_file.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel_read_file.h b/include/linux/kernel_read_file.h index 023293eaf948..575ffa1031d3 100644 --- a/include/linux/kernel_read_file.h +++ b/include/linux/kernel_read_file.h @@ -35,19 +35,19 @@ static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) return kernel_read_file_str[id]; } -int kernel_read_file(struct file *file, +int kernel_read_file(struct file *file, loff_t offset, void **buf, size_t buf_size, size_t *file_size, enum kernel_read_file_id id); -int kernel_read_file_from_path(const char *path, +int kernel_read_file_from_path(const char *path, loff_t offset, void **buf, size_t buf_size, size_t *file_size, enum kernel_read_file_id id); -int kernel_read_file_from_path_initns(const char *path, +int kernel_read_file_from_path_initns(const char *path, loff_t offset, void **buf, size_t buf_size, size_t *file_size, enum kernel_read_file_id id); -int kernel_read_file_from_fd(int fd, +int kernel_read_file_from_fd(int fd, loff_t offset, void **buf, size_t buf_size, size_t *file_size, enum kernel_read_file_id id); -- cgit v1.2.3 From 59cdb23ca2dfef3b93411d1105409dfe9cd1f62f Mon Sep 17 00:00:00 2001 From: Scott Branden Date: Fri, 2 Oct 2020 10:38:27 -0700 Subject: firmware: Add request_partial_firmware_into_buf() Add request_partial_firmware_into_buf() to allow for portions of a firmware file to be read into a buffer. This is needed when large firmware must be loaded in portions from a file on memory constrained systems. Signed-off-by: Scott Branden Co-developed-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201002173828.2099543-16-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware.h b/include/linux/firmware.h index cb3e2c06ed8a..c15acadc6cf4 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -53,6 +53,9 @@ int request_firmware_direct(const struct firmware **fw, const char *name, struct device *device); int request_firmware_into_buf(const struct firmware **firmware_p, const char *name, struct device *device, void *buf, size_t size); +int request_partial_firmware_into_buf(const struct firmware **firmware_p, + const char *name, struct device *device, + void *buf, size_t size, size_t offset); void release_firmware(const struct firmware *fw); #else @@ -102,6 +105,15 @@ static inline int request_firmware_into_buf(const struct firmware **firmware_p, return -EINVAL; } +static inline int request_partial_firmware_into_buf + (const struct firmware **firmware_p, + const char *name, + struct device *device, + void *buf, size_t size, size_t offset) +{ + return -EINVAL; +} + #endif int firmware_request_cache(struct device *device, const char *name); -- cgit v1.2.3 From 10ed16662da9e28a33b6c991c36c6b323b03dd5b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 18:06:18 +0200 Subject: block: add a bdget_part helper All remaining callers of bdget() outside of fs/block_dev.c want to get a reference to the struct block_device for a given struct hd_struct. Add a helper just for that and then mark bdget static. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d5a3e1a4c2f7..cf80e61b4c5e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -2003,7 +2003,7 @@ void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, void blkdev_put(struct block_device *bdev, fmode_t mode); struct block_device *I_BDEV(struct inode *inode); -struct block_device *bdget(dev_t); +struct block_device *bdget_part(struct hd_struct *part); struct block_device *bdgrab(struct block_device *bdev); void bdput(struct block_device *); -- cgit v1.2.3 From 07560151db960d26b425410d6fd778e6757a5da2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 15 Sep 2020 20:53:13 -0700 Subject: block: make bio_crypt_clone() able to fail bio_crypt_clone() assumes its gfp_mask argument always includes __GFP_DIRECT_RECLAIM, so that the mempool_alloc() will always succeed. However, bio_crypt_clone() might be called with GFP_ATOMIC via setup_clone() in drivers/md/dm-rq.c, or with GFP_NOWAIT via kcryptd_io_read() in drivers/md/dm-crypt.c. Neither case is currently reachable with a bio that actually has an encryption context. However, it's fragile to rely on this. Just make bio_crypt_clone() able to fail, analogous to bio_integrity_clone(). Reported-by: Miaohe Lin Signed-off-by: Eric Biggers Reviewed-by: Mike Snitzer Reviewed-by: Satya Tangirala Cc: Satya Tangirala Signed-off-by: Jens Axboe --- include/linux/blk-crypto.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index e82342907f2b..69b24fe92cbf 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -112,12 +112,24 @@ static inline bool bio_has_crypt_ctx(struct bio *bio) #endif /* CONFIG_BLK_INLINE_ENCRYPTION */ -void __bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask); -static inline void bio_crypt_clone(struct bio *dst, struct bio *src, - gfp_t gfp_mask) +int __bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask); +/** + * bio_crypt_clone - clone bio encryption context + * @dst: destination bio + * @src: source bio + * @gfp_mask: memory allocation flags + * + * If @src has an encryption context, clone it to @dst. + * + * Return: 0 on success, -ENOMEM if out of memory. -ENOMEM is only possible if + * @gfp_mask doesn't include %__GFP_DIRECT_RECLAIM. + */ +static inline int bio_crypt_clone(struct bio *dst, struct bio *src, + gfp_t gfp_mask) { if (bio_has_crypt_ctx(src)) - __bio_crypt_clone(dst, src, gfp_mask); + return __bio_crypt_clone(dst, src, gfp_mask); + return 0; } #endif /* __LINUX_BLK_CRYPTO_H */ -- cgit v1.2.3 From 3a6f0fb7b8eb5ef0447da1332225b8c87ee207d0 Mon Sep 17 00:00:00 2001 From: Laxminath Kasam Date: Mon, 5 Oct 2020 19:47:19 +0530 Subject: regmap: irq: Add support to clear ack registers For particular codec HWs have requirement to toggle interrupt clear register twice 0->1->0. To accommodate it, need to add one more field (clear_ack) in the regmap_irq struct and update regmap-irq driver to support it. Signed-off-by: Laxminath Kasam Link: https://lore.kernel.org/r/1601907440-13373-1-git-send-email-lkasam@codeaurora.org Signed-off-by: Mark Brown --- include/linux/regmap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index a35ec0a0d6e0..e7834d98207f 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1355,6 +1355,7 @@ struct regmap_irq_sub_irq_map { * @mask_invert: Inverted mask register: cleared bits are masked out. * @use_ack: Use @ack register even if it is zero. * @ack_invert: Inverted ack register: cleared bits for ack. + * @clear_ack: Use this to set 1 and 0 or vice-versa to clear interrupts. * @wake_invert: Inverted wake register: cleared bits are wake enabled. * @type_invert: Invert the type flags. * @type_in_mask: Use the mask registers for controlling irq type. For @@ -1403,6 +1404,7 @@ struct regmap_irq_chip { bool mask_invert:1; bool use_ack:1; bool ack_invert:1; + bool clear_ack:1; bool wake_invert:1; bool runtime_pm:1; bool type_invert:1; -- cgit v1.2.3 From 07da1223ec939982497db3caccd6215b55acc35c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 4 Oct 2020 18:43:37 +0300 Subject: lib/scatterlist: Add support in dynamic allocation of SG table from pages Extend __sg_alloc_table_from_pages to support dynamic allocation of SG table from pages. It should be used by drivers that can't supply all the pages at one time. This function returns the last populated SGE in the table. Users should pass it as an argument to the function from the second call and forward. As before, nents will be equal to the number of populated SGEs (chunks). With this new extension, drivers can benefit the optimization of merging contiguous pages without a need to allocate all pages in advance and hold them in a large buffer. E.g. with the Infiniband driver that allocates a single page for hold the pages. For 1TB memory registration, the temporary buffer would consume only 4KB, instead of 2GB. Link: https://lore.kernel.org/r/20201004154340.1080481-2-leon@kernel.org Signed-off-by: Maor Gottlieb Reviewed-by: Christoph Hellwig Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/scatterlist.h | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 45cf7b69d852..36c47e7e66a2 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -165,6 +165,22 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, #define for_each_sgtable_dma_sg(sgt, sg, i) \ for_each_sg((sgt)->sgl, sg, (sgt)->nents, i) +static inline void __sg_chain(struct scatterlist *chain_sg, + struct scatterlist *sgl) +{ + /* + * offset and length are unused for chain entry. Clear them. + */ + chain_sg->offset = 0; + chain_sg->length = 0; + + /* + * Set lowest bit to indicate a link pointer, and make sure to clear + * the termination bit if it happens to be set. + */ + chain_sg->page_link = ((unsigned long) sgl | SG_CHAIN) & ~SG_END; +} + /** * sg_chain - Chain two sglists together * @prv: First scatterlist @@ -178,18 +194,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, struct scatterlist *sgl) { - /* - * offset and length are unused for chain entry. Clear them. - */ - prv[prv_nents - 1].offset = 0; - prv[prv_nents - 1].length = 0; - - /* - * Set lowest bit to indicate a link pointer, and make sure to clear - * the termination bit if it happens to be set. - */ - prv[prv_nents - 1].page_link = ((unsigned long) sgl | SG_CHAIN) - & ~SG_END; + __sg_chain(&prv[prv_nents - 1], sgl); } /** @@ -286,10 +291,11 @@ void sg_free_table(struct sg_table *); int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *); int sg_alloc_table(struct sg_table *, unsigned int, gfp_t); -int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, - unsigned int n_pages, unsigned int offset, - unsigned long size, unsigned int max_segment, - gfp_t gfp_mask); +struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt, + struct page **pages, unsigned int n_pages, unsigned int offset, + unsigned long size, unsigned int max_segment, + struct scatterlist *prv, unsigned int left_pages, + gfp_t gfp_mask); int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, gfp_t gfp_mask); -- cgit v1.2.3 From 0a0f0d8be76dcd4390ff538e7060fda34db79717 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Sep 2020 15:31:03 +0200 Subject: dma-mapping: split Split out all the bits that are purely for dma_map_ops implementations and related code into a new header so that they don't get pulled into all the drivers. That also means the architecture specific is not pulled in by any more, which leads to a missing includes that were pulled in by the x86 or arm versions in a few not overly portable drivers. Signed-off-by: Christoph Hellwig --- include/linux/dma-map-ops.h | 158 +++++++++++++++++++++++++++++++++++++++++ include/linux/dma-mapping.h | 168 +------------------------------------------- 2 files changed, 160 insertions(+), 166 deletions(-) create mode 100644 include/linux/dma-map-ops.h (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h new file mode 100644 index 000000000000..4b4ba5bdcf6a --- /dev/null +++ b/include/linux/dma-map-ops.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This header is for implementations of dma_map_ops and related code. + * It should not be included in drivers just using the DMA API. + */ +#ifndef _LINUX_DMA_MAP_OPS_H +#define _LINUX_DMA_MAP_OPS_H + +#include + +struct dma_map_ops { + void *(*alloc)(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + unsigned long attrs); + void (*free)(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs); + struct page *(*alloc_pages)(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, + gfp_t gfp); + void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); + void *(*alloc_noncoherent)(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, + gfp_t gfp); + void (*free_noncoherent)(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); + int (*mmap)(struct device *, struct vm_area_struct *, + void *, dma_addr_t, size_t, unsigned long attrs); + + int (*get_sgtable)(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); + + dma_addr_t (*map_page)(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs); + void (*unmap_page)(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + /* + * map_sg returns 0 on error and a value > 0 on success. + * It should never return a value < 0. + */ + int (*map_sg)(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); + void (*unmap_sg)(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); + dma_addr_t (*map_resource)(struct device *dev, phys_addr_t phys_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + void (*unmap_resource)(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + void (*sync_single_for_cpu)(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir); + void (*sync_single_for_device)(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction dir); + void (*sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir); + void (*sync_sg_for_device)(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir); + void (*cache_sync)(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction); + int (*dma_supported)(struct device *dev, u64 mask); + u64 (*get_required_mask)(struct device *dev); + size_t (*max_mapping_size)(struct device *dev); + unsigned long (*get_merge_boundary)(struct device *dev); +}; + +#ifdef CONFIG_DMA_OPS +#include + +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +{ + if (dev->dma_ops) + return dev->dma_ops; + return get_arch_dma_ops(dev->bus); +} + +static inline void set_dma_ops(struct device *dev, + const struct dma_map_ops *dma_ops) +{ + dev->dma_ops = dma_ops; +} +#else /* CONFIG_DMA_OPS */ +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +{ + return NULL; +} +static inline void set_dma_ops(struct device *dev, + const struct dma_map_ops *dma_ops) +{ +} +#endif /* CONFIG_DMA_OPS */ + +#ifdef CONFIG_DMA_DECLARE_COHERENT +int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, + dma_addr_t device_addr, size_t size); +int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret); +int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr); +int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, size_t size, int *ret); + +void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle); +int dma_release_from_global_coherent(int order, void *vaddr); +int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr, + size_t size, int *ret); + +#else +static inline int dma_declare_coherent_memory(struct device *dev, + phys_addr_t phys_addr, dma_addr_t device_addr, size_t size) +{ + return -ENOSYS; +} +#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0) +#define dma_release_from_dev_coherent(dev, order, vaddr) (0) +#define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0) + +static inline void *dma_alloc_from_global_coherent(struct device *dev, + ssize_t size, dma_addr_t *dma_handle) +{ + return NULL; +} +static inline int dma_release_from_global_coherent(int order, void *vaddr) +{ + return 0; +} +static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, + void *cpu_addr, size_t size, int *ret) +{ + return 0; +} +#endif /* CONFIG_DMA_DECLARE_COHERENT */ + +#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent); +#else +static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, + u64 size, const struct iommu_ops *iommu, bool coherent) +{ +} +#endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */ + +#ifdef CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS +void arch_teardown_dma_ops(struct device *dev); +#else +static inline void arch_teardown_dma_ops(struct device *dev) +{ +} +#endif /* CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS */ + +extern const struct dma_map_ops dma_dummy_ops; + +#endif /* _LINUX_DMA_MAP_OPS_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 7c77cd6f3604..9591cd482d7c 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -62,72 +62,6 @@ */ #define DMA_ATTR_PRIVILEGED (1UL << 9) -struct dma_map_ops { - void* (*alloc)(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs); - void (*free)(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs); - struct page *(*alloc_pages)(struct device *dev, size_t size, - dma_addr_t *dma_handle, enum dma_data_direction dir, - gfp_t gfp); - void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, - dma_addr_t dma_handle, enum dma_data_direction dir); - void* (*alloc_noncoherent)(struct device *dev, size_t size, - dma_addr_t *dma_handle, enum dma_data_direction dir, - gfp_t gfp); - void (*free_noncoherent)(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, enum dma_data_direction dir); - int (*mmap)(struct device *, struct vm_area_struct *, - void *, dma_addr_t, size_t, - unsigned long attrs); - - int (*get_sgtable)(struct device *dev, struct sg_table *sgt, void *, - dma_addr_t, size_t, unsigned long attrs); - - dma_addr_t (*map_page)(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs); - void (*unmap_page)(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs); - /* - * map_sg returns 0 on error and a value > 0 on success. - * It should never return a value < 0. - */ - int (*map_sg)(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs); - void (*unmap_sg)(struct device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction dir, - unsigned long attrs); - dma_addr_t (*map_resource)(struct device *dev, phys_addr_t phys_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs); - void (*unmap_resource)(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs); - void (*sync_single_for_cpu)(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir); - void (*sync_single_for_device)(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir); - void (*sync_sg_for_cpu)(struct device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction dir); - void (*sync_sg_for_device)(struct device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction dir); - int (*dma_supported)(struct device *dev, u64 mask); - u64 (*get_required_mask)(struct device *dev); - size_t (*max_mapping_size)(struct device *dev); - unsigned long (*get_merge_boundary)(struct device *dev); -}; - /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can * be given to a device to use as a DMA source or target. It is specific to a @@ -140,79 +74,9 @@ struct dma_map_ops { */ #define DMA_MAPPING_ERROR (~(dma_addr_t)0) -extern const struct dma_map_ops dma_virt_ops; -extern const struct dma_map_ops dma_dummy_ops; - #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) -#ifdef CONFIG_DMA_DECLARE_COHERENT -/* - * These three functions are only for dma allocator. - * Don't use them in device drivers. - */ -int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, - dma_addr_t *dma_handle, void **ret); -int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr); - -int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, size_t size, int *ret); - -void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle); -int dma_release_from_global_coherent(int order, void *vaddr); -int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr, - size_t size, int *ret); - -#else -#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0) -#define dma_release_from_dev_coherent(dev, order, vaddr) (0) -#define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0) - -static inline void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, - dma_addr_t *dma_handle) -{ - return NULL; -} - -static inline int dma_release_from_global_coherent(int order, void *vaddr) -{ - return 0; -} - -static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, - void *cpu_addr, size_t size, - int *ret) -{ - return 0; -} -#endif /* CONFIG_DMA_DECLARE_COHERENT */ - #ifdef CONFIG_HAS_DMA -#include - -#ifdef CONFIG_DMA_OPS -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) -{ - if (dev->dma_ops) - return dev->dma_ops; - return get_arch_dma_ops(dev->bus); -} - -static inline void set_dma_ops(struct device *dev, - const struct dma_map_ops *dma_ops) -{ - dev->dma_ops = dma_ops; -} -#else /* CONFIG_DMA_OPS */ -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) -{ - return NULL; -} -static inline void set_dma_ops(struct device *dev, - const struct dma_map_ops *dma_ops) -{ -} -#endif /* CONFIG_DMA_OPS */ - static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { debug_dma_mapping_error(dev, dma_addr); @@ -595,24 +459,6 @@ static inline bool dma_addressing_limited(struct device *dev) dma_get_required_mask(dev); } -#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent); -#else -static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, - u64 size, const struct iommu_ops *iommu, bool coherent) -{ -} -#endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */ - -#ifdef CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS -void arch_teardown_dma_ops(struct device *dev); -#else -static inline void arch_teardown_dma_ops(struct device *dev) -{ -} -#endif /* CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS */ - static inline unsigned int dma_get_max_seg_size(struct device *dev) { if (dev->dma_parms && dev->dma_parms->max_segment_size) @@ -672,18 +518,6 @@ static inline int dma_get_cache_alignment(void) return 1; } -#ifdef CONFIG_DMA_DECLARE_COHERENT -int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size); -#else -static inline int -dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size) -{ - return -ENOSYS; -} -#endif /* CONFIG_DMA_DECLARE_COHERENT */ - static inline void *dmam_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { @@ -741,4 +575,6 @@ static inline int dma_mmap_wc(struct device *dev, int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, dma_addr_t dma_start, u64 size); +extern const struct dma_map_ops dma_virt_ops; + #endif /* _LINUX_DMA_MAPPING_H */ -- cgit v1.2.3 From 8df4051232152a0520ab3035c2d96f33083c2d6a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 10:29:47 +0200 Subject: dma-contiguous: remove dma_declare_contiguous dma_declare_contiguous is a trivial wrapper around dma_contiguous_reserve_area and just has a single caller. Signed-off-by: Christoph Hellwig --- include/linux/dma-contiguous.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index fe55e004f1f4..62fd55d07235 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -83,31 +83,6 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, phys_addr_t limit, struct cma **res_cma, bool fixed); -/** - * dma_declare_contiguous() - reserve area for contiguous memory handling - * for particular device - * @dev: Pointer to device structure. - * @size: Size of the reserved memory. - * @base: Start address of the reserved memory (optional, 0 for any). - * @limit: End address of the reserved memory (optional, 0 for any). - * - * This function reserves memory for specified device. It should be - * called by board specific code when early allocator (memblock or bootmem) - * is still activate. - */ - -static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size, - phys_addr_t base, phys_addr_t limit) -{ - struct cma *cma; - int ret; - ret = dma_contiguous_reserve_area(size, base, limit, &cma, true); - if (ret == 0) - dev_set_cma_area(dev, cma); - - return ret; -} - struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, unsigned int order, bool no_warn); bool dma_release_from_contiguous(struct device *dev, struct page *pages, @@ -135,13 +110,6 @@ static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base return -ENOSYS; } -static inline -int dma_declare_contiguous(struct device *dev, phys_addr_t size, - phys_addr_t base, phys_addr_t limit) -{ - return -ENOSYS; -} - static inline struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, unsigned int order, bool no_warn) -- cgit v1.2.3 From 5af638931eb374aa0894d8343cee72f50307ef20 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 10:56:03 +0200 Subject: dma-contiguous: remove dev_set_cma_area dev_set_cma_area contains a trivial assignment. It has just three callers that all have a non-NULL device and depend on CONFIG_DMA_CMA, so remove the wrapper. Signed-off-by: Christoph Hellwig --- include/linux/dma-contiguous.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 62fd55d07235..41ec08d81bc3 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -66,12 +66,6 @@ static inline struct cma *dev_get_cma_area(struct device *dev) return dma_contiguous_default_area; } -static inline void dev_set_cma_area(struct device *dev, struct cma *cma) -{ - if (dev) - dev->cma_area = cma; -} - static inline void dma_contiguous_set_default(struct cma *cma) { dma_contiguous_default_area = cma; @@ -97,8 +91,6 @@ static inline struct cma *dev_get_cma_area(struct device *dev) return NULL; } -static inline void dev_set_cma_area(struct device *dev, struct cma *cma) { } - static inline void dma_contiguous_set_default(struct cma *cma) { } static inline void dma_contiguous_reserve(phys_addr_t limit) { } -- cgit v1.2.3 From 580a0cc9c3f662e0b10136bc8af1e672e472806f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 10:56:40 +0200 Subject: dma-contiguous: remove dma_contiguous_set_default dma_contiguous_set_default contains a trivial assignment, and has a single caller that is compiled if CONFIG_CMA_DMA is enabled. Signed-off-by: Christoph Hellwig --- include/linux/dma-contiguous.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 41ec08d81bc3..f9ce1ee58d41 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -66,11 +66,6 @@ static inline struct cma *dev_get_cma_area(struct device *dev) return dma_contiguous_default_area; } -static inline void dma_contiguous_set_default(struct cma *cma) -{ - dma_contiguous_default_area = cma; -} - void dma_contiguous_reserve(phys_addr_t addr_limit); int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, @@ -91,8 +86,6 @@ static inline struct cma *dev_get_cma_area(struct device *dev) return NULL; } -static inline void dma_contiguous_set_default(struct cma *cma) { } - static inline void dma_contiguous_reserve(phys_addr_t limit) { } static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, -- cgit v1.2.3 From 0b1abd1fb7efafc25231c54a67c6fbb3d3127efd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 10:56:52 +0200 Subject: dma-mapping: merge into Merge dma-contiguous.h into dma-map-ops.h, after removing the comment describing the contiguous allocator into kernel/dma/contigous.c. Signed-off-by: Christoph Hellwig --- include/linux/dma-contiguous.h | 135 ----------------------------------------- include/linux/dma-map-ops.h | 65 ++++++++++++++++++++ 2 files changed, 65 insertions(+), 135 deletions(-) delete mode 100644 include/linux/dma-contiguous.h (limited to 'include/linux') diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h deleted file mode 100644 index f9ce1ee58d41..000000000000 --- a/include/linux/dma-contiguous.h +++ /dev/null @@ -1,135 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef __LINUX_CMA_H -#define __LINUX_CMA_H - -/* - * Contiguous Memory Allocator for DMA mapping framework - * Copyright (c) 2010-2011 by Samsung Electronics. - * Written by: - * Marek Szyprowski - * Michal Nazarewicz - */ - -/* - * Contiguous Memory Allocator - * - * The Contiguous Memory Allocator (CMA) makes it possible to - * allocate big contiguous chunks of memory after the system has - * booted. - * - * Why is it needed? - * - * Various devices on embedded systems have no scatter-getter and/or - * IO map support and require contiguous blocks of memory to - * operate. They include devices such as cameras, hardware video - * coders, etc. - * - * Such devices often require big memory buffers (a full HD frame - * is, for instance, more then 2 mega pixels large, i.e. more than 6 - * MB of memory), which makes mechanisms such as kmalloc() or - * alloc_page() ineffective. - * - * At the same time, a solution where a big memory region is - * reserved for a device is suboptimal since often more memory is - * reserved then strictly required and, moreover, the memory is - * inaccessible to page system even if device drivers don't use it. - * - * CMA tries to solve this issue by operating on memory regions - * where only movable pages can be allocated from. This way, kernel - * can use the memory for pagecache and when device driver requests - * it, allocated pages can be migrated. - * - * Driver usage - * - * CMA should not be used by the device drivers directly. It is - * only a helper framework for dma-mapping subsystem. - * - * For more information, see kernel-docs in kernel/dma/contiguous.c - */ - -#ifdef __KERNEL__ - -#include -#include - -struct cma; -struct page; - -#ifdef CONFIG_DMA_CMA - -extern struct cma *dma_contiguous_default_area; - -static inline struct cma *dev_get_cma_area(struct device *dev) -{ - if (dev && dev->cma_area) - return dev->cma_area; - return dma_contiguous_default_area; -} - -void dma_contiguous_reserve(phys_addr_t addr_limit); - -int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, - phys_addr_t limit, struct cma **res_cma, - bool fixed); - -struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, - unsigned int order, bool no_warn); -bool dma_release_from_contiguous(struct device *dev, struct page *pages, - int count); -struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp); -void dma_free_contiguous(struct device *dev, struct page *page, size_t size); - -#else - -static inline struct cma *dev_get_cma_area(struct device *dev) -{ - return NULL; -} - -static inline void dma_contiguous_reserve(phys_addr_t limit) { } - -static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, - phys_addr_t limit, struct cma **res_cma, - bool fixed) -{ - return -ENOSYS; -} - -static inline -struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, - unsigned int order, bool no_warn) -{ - return NULL; -} - -static inline -bool dma_release_from_contiguous(struct device *dev, struct page *pages, - int count) -{ - return false; -} - -/* Use fallback alloc() and free() when CONFIG_DMA_CMA=n */ -static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size, - gfp_t gfp) -{ - return NULL; -} - -static inline void dma_free_contiguous(struct device *dev, struct page *page, - size_t size) -{ - __free_pages(page, get_order(size)); -} - -#endif - -#ifdef CONFIG_DMA_PERNUMA_CMA -void dma_pernuma_cma_reserve(void); -#else -static inline void dma_pernuma_cma_reserve(void) { } -#endif - -#endif - -#endif diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 4b4ba5bdcf6a..474fc81bd492 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -8,6 +8,8 @@ #include +struct cma; + struct dma_map_ops { void *(*alloc)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, @@ -94,6 +96,69 @@ static inline void set_dma_ops(struct device *dev, } #endif /* CONFIG_DMA_OPS */ +#ifdef CONFIG_DMA_CMA +extern struct cma *dma_contiguous_default_area; + +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + if (dev && dev->cma_area) + return dev->cma_area; + return dma_contiguous_default_area; +} + +void dma_contiguous_reserve(phys_addr_t addr_limit); +int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, + phys_addr_t limit, struct cma **res_cma, bool fixed); + +struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, + unsigned int order, bool no_warn); +bool dma_release_from_contiguous(struct device *dev, struct page *pages, + int count); +struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp); +void dma_free_contiguous(struct device *dev, struct page *page, size_t size); +#else /* CONFIG_DMA_CMA */ +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + return NULL; +} +static inline void dma_contiguous_reserve(phys_addr_t limit) +{ +} +static inline int dma_contiguous_reserve_area(phys_addr_t size, + phys_addr_t base, phys_addr_t limit, struct cma **res_cma, + bool fixed) +{ + return -ENOSYS; +} +static inline struct page *dma_alloc_from_contiguous(struct device *dev, + size_t count, unsigned int order, bool no_warn) +{ + return NULL; +} +static inline bool dma_release_from_contiguous(struct device *dev, + struct page *pages, int count) +{ + return false; +} +/* Use fallback alloc() and free() when CONFIG_DMA_CMA=n */ +static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size, + gfp_t gfp) +{ + return NULL; +} +static inline void dma_free_contiguous(struct device *dev, struct page *page, + size_t size) +{ + __free_pages(page, get_order(size)); +} +#endif /* CONFIG_DMA_CMA*/ + +#ifdef CONFIG_DMA_PERNUMA_CMA +void dma_pernuma_cma_reserve(void); +#else +static inline void dma_pernuma_cma_reserve(void) { } +#endif /* CONFIG_DMA_PERNUMA_CMA */ + #ifdef CONFIG_DMA_DECLARE_COHERENT int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size); -- cgit v1.2.3 From 5db5d93089880c3cc9e83ca8bba68a5502e92dfe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 11:04:43 +0200 Subject: dma-mapping: remove Just provide a weak default definition of dma_contiguous_early_fixup and let arm override it. Signed-off-by: Christoph Hellwig --- include/linux/dma-map-ops.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 474fc81bd492..7912f5d00ed9 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -116,6 +116,8 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, int count); struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp); void dma_free_contiguous(struct device *dev, struct page *page, size_t size); + +void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size); #else /* CONFIG_DMA_CMA */ static inline struct cma *dev_get_cma_area(struct device *dev) { -- cgit v1.2.3 From a1fd09e8e6ae35228ecc7c1e4bfff1fd725f78a0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 10:12:44 +0200 Subject: dma-mapping: move dma-debug.h to kernel/dma/ Most of dma-debug.h is not required by anything outside of kernel/dma. Move the four declarations needed by dma-mappin.h or dma-ops providers into dma-mapping.h and dma-map-ops.h, and move the remainder of the file to kernel/dma/debug.h. Signed-off-by: Christoph Hellwig --- include/linux/dma-debug.h | 160 -------------------------------------------- include/linux/dma-map-ops.h | 12 ++++ include/linux/dma-mapping.h | 16 ++++- 3 files changed, 27 insertions(+), 161 deletions(-) delete mode 100644 include/linux/dma-debug.h (limited to 'include/linux') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h deleted file mode 100644 index 7b3b04ba60f3..000000000000 --- a/include/linux/dma-debug.h +++ /dev/null @@ -1,160 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2008 Advanced Micro Devices, Inc. - * - * Author: Joerg Roedel - */ - -#ifndef __DMA_DEBUG_H -#define __DMA_DEBUG_H - -#include - -struct device; -struct scatterlist; -struct bus_type; - -#ifdef CONFIG_DMA_API_DEBUG - -extern void dma_debug_add_bus(struct bus_type *bus); - -extern void debug_dma_map_single(struct device *dev, const void *addr, - unsigned long len); - -extern void debug_dma_map_page(struct device *dev, struct page *page, - size_t offset, size_t size, - int direction, dma_addr_t dma_addr); - -extern void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); - -extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, int direction); - -extern void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, int mapped_ents, int direction); - -extern void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems, int dir); - -extern void debug_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t dma_addr, void *virt); - -extern void debug_dma_free_coherent(struct device *dev, size_t size, - void *virt, dma_addr_t addr); - -extern void debug_dma_map_resource(struct device *dev, phys_addr_t addr, - size_t size, int direction, - dma_addr_t dma_addr); - -extern void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, - size_t size, int direction); - -extern void debug_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, - int direction); - -extern void debug_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, - size_t size, int direction); - -extern void debug_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sg, - int nelems, int direction); - -extern void debug_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, - int nelems, int direction); - -extern void debug_dma_dump_mappings(struct device *dev); - -#else /* CONFIG_DMA_API_DEBUG */ - -static inline void dma_debug_add_bus(struct bus_type *bus) -{ -} - -static inline void debug_dma_map_single(struct device *dev, const void *addr, - unsigned long len) -{ -} - -static inline void debug_dma_map_page(struct device *dev, struct page *page, - size_t offset, size_t size, - int direction, dma_addr_t dma_addr) -{ -} - -static inline void debug_dma_mapping_error(struct device *dev, - dma_addr_t dma_addr) -{ -} - -static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, int direction) -{ -} - -static inline void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, int mapped_ents, int direction) -{ -} - -static inline void debug_dma_unmap_sg(struct device *dev, - struct scatterlist *sglist, - int nelems, int dir) -{ -} - -static inline void debug_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t dma_addr, void *virt) -{ -} - -static inline void debug_dma_free_coherent(struct device *dev, size_t size, - void *virt, dma_addr_t addr) -{ -} - -static inline void debug_dma_map_resource(struct device *dev, phys_addr_t addr, - size_t size, int direction, - dma_addr_t dma_addr) -{ -} - -static inline void debug_dma_unmap_resource(struct device *dev, - dma_addr_t dma_addr, size_t size, - int direction) -{ -} - -static inline void debug_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, - size_t size, int direction) -{ -} - -static inline void debug_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, - size_t size, int direction) -{ -} - -static inline void debug_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sg, - int nelems, int direction) -{ -} - -static inline void debug_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, - int nelems, int direction) -{ -} - -static inline void debug_dma_dump_mappings(struct device *dev) -{ -} - -#endif /* CONFIG_DMA_API_DEBUG */ - -#endif /* __DMA_DEBUG_H */ diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 7912f5d00ed9..9891def42da7 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -220,6 +220,18 @@ static inline void arch_teardown_dma_ops(struct device *dev) } #endif /* CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS */ +#ifdef CONFIG_DMA_API_DEBUG +void dma_debug_add_bus(struct bus_type *bus); +void debug_dma_dump_mappings(struct device *dev); +#else +static inline void dma_debug_add_bus(struct bus_type *bus) +{ +} +static inline void debug_dma_dump_mappings(struct device *dev) +{ +} +#endif /* CONFIG_DMA_API_DEBUG */ + extern const struct dma_map_ops dma_dummy_ops; #endif /* _LINUX_DMA_MAP_OPS_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 9591cd482d7c..3f029afdc9dc 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -76,6 +75,21 @@ #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) +#ifdef CONFIG_DMA_API_DEBUG +void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); +void debug_dma_map_single(struct device *dev, const void *addr, + unsigned long len); +#else +static inline void debug_dma_mapping_error(struct device *dev, + dma_addr_t dma_addr) +{ +} +static inline void debug_dma_map_single(struct device *dev, const void *addr, + unsigned long len) +{ +} +#endif /* CONFIG_DMA_API_DEBUG */ + #ifdef CONFIG_HAS_DMA static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { -- cgit v1.2.3 From 19c65c3d30bb5a97170e425979d2e44ab2096c7d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Sep 2020 15:34:22 +0200 Subject: dma-mapping: move large parts of to kernel/dma Most of the dma_direct symbols should only be used by direct.c and mapping.c, so move them to kernel/dma. In fact more of dma-direct.h should eventually move, but that will require more coordination with other subsystems. Signed-off-by: Christoph Hellwig --- include/linux/dma-direct.h | 106 --------------------------------------------- 1 file changed, 106 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 38ed3b55034d..a2d6640c42c0 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -120,114 +120,8 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, void dma_direct_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_addr, enum dma_data_direction dir); -int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs); -bool dma_direct_can_mmap(struct device *dev); -int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs); int dma_direct_supported(struct device *dev, u64 mask); -bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); -int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs); dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir, unsigned long attrs); -size_t dma_direct_max_mapping_size(struct device *dev); -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ - defined(CONFIG_SWIOTLB) -void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir); -#else -static inline void dma_direct_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ -} -#endif - -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ - defined(CONFIG_SWIOTLB) -void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs); -void dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir); -#else -static inline void dma_direct_unmap_sg(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ -} -static inline void dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ -} -#endif - -static inline void dma_direct_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - phys_addr_t paddr = dma_to_phys(dev, addr); - - if (unlikely(is_swiotlb_buffer(paddr))) - swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_device(paddr, size, dir); -} - -static inline void dma_direct_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - phys_addr_t paddr = dma_to_phys(dev, addr); - - if (!dev_is_dma_coherent(dev)) { - arch_sync_dma_for_cpu(paddr, size, dir); - arch_sync_dma_for_cpu_all(); - } - - if (unlikely(is_swiotlb_buffer(paddr))) - swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); - - if (dir == DMA_FROM_DEVICE) - arch_dma_mark_clean(paddr, size); -} - -static inline dma_addr_t dma_direct_map_page(struct device *dev, - struct page *page, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t phys = page_to_phys(page) + offset; - dma_addr_t dma_addr = phys_to_dma(dev, phys); - - if (unlikely(swiotlb_force == SWIOTLB_FORCE)) - return swiotlb_map(dev, phys, size, dir, attrs); - - if (unlikely(!dma_capable(dev, dma_addr, size, true))) { - if (swiotlb_force != SWIOTLB_NO_FORCE) - return swiotlb_map(dev, phys, size, dir, attrs); - - dev_WARN_ONCE(dev, 1, - "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", - &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); - return DMA_MAPPING_ERROR; - } - - if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(phys, size, dir); - return dma_addr; -} - -static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t phys = dma_to_phys(dev, addr); - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_direct_sync_single_for_cpu(dev, addr, size, dir); - - if (unlikely(is_swiotlb_buffer(phys))) - swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs); -} #endif /* _LINUX_DMA_DIRECT_H */ -- cgit v1.2.3 From 9f4df96b8781e40d0cb0e32eb3d1f6d87375adf9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Sep 2020 15:36:11 +0200 Subject: dma-mapping: merge into Move more nitty gritty DMA implementation details into the common internal header. Signed-off-by: Christoph Hellwig --- include/linux/dma-direct.h | 2 +- include/linux/dma-map-ops.h | 102 +++++++++++++++++++++++++++++++++++++ include/linux/dma-noncoherent.h | 109 ---------------------------------------- 3 files changed, 103 insertions(+), 110 deletions(-) delete mode 100644 include/linux/dma-noncoherent.h (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index a2d6640c42c0..18aade195884 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -7,7 +7,7 @@ #define _LINUX_DMA_DIRECT_H 1 #include -#include +#include #include /* for min_low_pfn */ #include #include diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 9891def42da7..33c6e24707a9 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -7,6 +7,7 @@ #define _LINUX_DMA_MAP_OPS_H #include +#include struct cma; @@ -202,6 +203,107 @@ static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, } #endif /* CONFIG_DMA_DECLARE_COHERENT */ +#ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H +#include +#elif defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) +static inline bool dev_is_dma_coherent(struct device *dev) +{ + return dev->dma_coherent; +} +#else +static inline bool dev_is_dma_coherent(struct device *dev) +{ + return true; +} +#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ + +/* + * Check if an allocation needs to be marked uncached to be coherent. + */ +static __always_inline bool dma_alloc_need_uncached(struct device *dev, + unsigned long attrs) +{ + if (dev_is_dma_coherent(dev)) + return false; + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) + return false; + return true; +} + +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs); +void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs); + +#ifdef CONFIG_MMU +/* + * Page protection so that devices that can't snoop CPU caches can use the + * memory coherently. We default to pgprot_noncached which is usually used + * for ioremap as a safe bet, but architectures can override this with less + * strict semantics if possible. + */ +#ifndef pgprot_dmacoherent +#define pgprot_dmacoherent(prot) pgprot_noncached(prot) +#endif + +pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs); +#else +static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, + unsigned long attrs) +{ + return prot; /* no protection bits supported without page tables */ +} +#endif /* CONFIG_MMU */ + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir); +#else +static inline void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ +} +#endif /* ARCH_HAS_SYNC_DMA_FOR_DEVICE */ + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir); +#else +static inline void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ +} +#endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */ + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL +void arch_sync_dma_for_cpu_all(void); +#else +static inline void arch_sync_dma_for_cpu_all(void) +{ +} +#endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */ + +#ifdef CONFIG_ARCH_HAS_DMA_PREP_COHERENT +void arch_dma_prep_coherent(struct page *page, size_t size); +#else +static inline void arch_dma_prep_coherent(struct page *page, size_t size) +{ +} +#endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */ + +#ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN +void arch_dma_mark_clean(phys_addr_t paddr, size_t size); +#else +static inline void arch_dma_mark_clean(phys_addr_t paddr, size_t size) +{ +} +#endif /* ARCH_HAS_DMA_MARK_CLEAN */ + +void *arch_dma_set_uncached(void *addr, size_t size); +void arch_dma_clear_uncached(void *addr, size_t size); + #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent); diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h deleted file mode 100644 index e61283e06576..000000000000 --- a/include/linux/dma-noncoherent.h +++ /dev/null @@ -1,109 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_DMA_NONCOHERENT_H -#define _LINUX_DMA_NONCOHERENT_H 1 - -#include -#include - -#ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H -#include -#elif defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) -static inline bool dev_is_dma_coherent(struct device *dev) -{ - return dev->dma_coherent; -} -#else -static inline bool dev_is_dma_coherent(struct device *dev) -{ - return true; -} -#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ - -/* - * Check if an allocation needs to be marked uncached to be coherent. - */ -static __always_inline bool dma_alloc_need_uncached(struct device *dev, - unsigned long attrs) -{ - if (dev_is_dma_coherent(dev)) - return false; - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) - return false; - return true; -} - -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs); -void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs); - -#ifdef CONFIG_MMU -/* - * Page protection so that devices that can't snoop CPU caches can use the - * memory coherently. We default to pgprot_noncached which is usually used - * for ioremap as a safe bet, but architectures can override this with less - * strict semantics if possible. - */ -#ifndef pgprot_dmacoherent -#define pgprot_dmacoherent(prot) pgprot_noncached(prot) -#endif - -pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs); -#else -static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, - unsigned long attrs) -{ - return prot; /* no protection bits supported without page tables */ -} -#endif /* CONFIG_MMU */ - -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, - enum dma_data_direction dir); -#else -static inline void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) -{ -} -#endif /* ARCH_HAS_SYNC_DMA_FOR_DEVICE */ - -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, - enum dma_data_direction dir); -#else -static inline void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) -{ -} -#endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */ - -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL -void arch_sync_dma_for_cpu_all(void); -#else -static inline void arch_sync_dma_for_cpu_all(void) -{ -} -#endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */ - -#ifdef CONFIG_ARCH_HAS_DMA_PREP_COHERENT -void arch_dma_prep_coherent(struct page *page, size_t size); -#else -static inline void arch_dma_prep_coherent(struct page *page, size_t size) -{ -} -#endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */ - -#ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN -void arch_dma_mark_clean(phys_addr_t paddr, size_t size); -#else -static inline void arch_dma_mark_clean(phys_addr_t paddr, size_t size) -{ -} -#endif /* ARCH_HAS_DMA_MARK_CLEAN */ - -void *arch_dma_set_uncached(void *addr, size_t size); -void arch_dma_clear_uncached(void *addr, size_t size); - -#endif /* _LINUX_DMA_NONCOHERENT_H */ -- cgit v1.2.3 From 5de15b610f785f0e188fefb707f0b19de156968a Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 3 Oct 2020 23:23:20 +0300 Subject: mtd: hyperbus: add Renesas RPC-IF driver Add the HyperFLash driver for the Renesas RPC-IF. It's the "front end" driver using the "back end" APIs in the main driver to talk to the real hardware. Signed-off-by: Sergei Shtylyov Signed-off-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/78abb851-2beb-fe7d-87e5-ce58ee877d35@gmail.com --- include/linux/mtd/hyperbus.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/hyperbus.h b/include/linux/mtd/hyperbus.h index d8cb1aec826d..0ce612428aea 100644 --- a/include/linux/mtd/hyperbus.h +++ b/include/linux/mtd/hyperbus.h @@ -8,6 +8,17 @@ #include +/* HyperBus command bits */ +#define HYPERBUS_RW 0x80 /* R/W# */ +#define HYPERBUS_RW_WRITE 0 +#define HYPERBUS_RW_READ 0x80 +#define HYPERBUS_AS 0x40 /* Address Space */ +#define HYPERBUS_AS_MEM 0 +#define HYPERBUS_AS_REG 0x40 +#define HYPERBUS_BT 0x20 /* Burst Type */ +#define HYPERBUS_BT_WRAPPED 0 +#define HYPERBUS_BT_LINEAR 0x20 + enum hyperbus_memtype { HYPERFLASH, HYPERRAM, -- cgit v1.2.3 From ec6347bb43395cb92126788a1a5b25302543f815 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Oct 2020 20:40:16 -0700 Subject: x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}() In reaction to a proposal to introduce a memcpy_mcsafe_fast() implementation Linus points out that memcpy_mcsafe() is poorly named relative to communicating the scope of the interface. Specifically what addresses are valid to pass as source, destination, and what faults / exceptions are handled. Of particular concern is that even though x86 might be able to handle the semantics of copy_mc_to_user() with its common copy_user_generic() implementation other archs likely need / want an explicit path for this case: On Fri, May 1, 2020 at 11:28 AM Linus Torvalds wrote: > > On Thu, Apr 30, 2020 at 6:21 PM Dan Williams wrote: > > > > However now I see that copy_user_generic() works for the wrong reason. > > It works because the exception on the source address due to poison > > looks no different than a write fault on the user address to the > > caller, it's still just a short copy. So it makes copy_to_user() work > > for the wrong reason relative to the name. > > Right. > > And it won't work that way on other architectures. On x86, we have a > generic function that can take faults on either side, and we use it > for both cases (and for the "in_user" case too), but that's an > artifact of the architecture oddity. > > In fact, it's probably wrong even on x86 - because it can hide bugs - > but writing those things is painful enough that everybody prefers > having just one function. Replace a single top-level memcpy_mcsafe() with either copy_mc_to_user(), or copy_mc_to_kernel(). Introduce an x86 copy_mc_fragile() name as the rename for the low-level x86 implementation formerly named memcpy_mcsafe(). It is used as the slow / careful backend that is supplanted by a fast copy_mc_generic() in a follow-on patch. One side-effect of this reorganization is that separating copy_mc_64.S to its own file means that perf no longer needs to track dependencies for its memcpy_64.S benchmarks. [ bp: Massage a bit. ] Signed-off-by: Dan Williams Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Acked-by: Michael Ellerman Cc: Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com --- include/linux/string.h | 9 +-------- include/linux/uaccess.h | 13 +++++++++++++ include/linux/uio.h | 10 +++++----- 3 files changed, 19 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index 9b7a0632e87a..b1f3894a0a3e 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -161,20 +161,13 @@ extern int bcmp(const void *,const void *,__kernel_size_t); #ifndef __HAVE_ARCH_MEMCHR extern void * memchr(const void *,int,__kernel_size_t); #endif -#ifndef __HAVE_ARCH_MEMCPY_MCSAFE -static inline __must_check unsigned long memcpy_mcsafe(void *dst, - const void *src, size_t cnt) -{ - memcpy(dst, src, cnt); - return 0; -} -#endif #ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) { memcpy(dst, src, cnt); } #endif + void *memchr_inv(const void *s, int c, size_t n); char *strreplace(char *s, char old, char new); diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 94b285411659..1ae36bc8db35 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -179,6 +179,19 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n) } #endif +#ifndef copy_mc_to_kernel +/* + * Without arch opt-in this generic copy_mc_to_kernel() will not handle + * #MC (or arch equivalent) during source read. + */ +static inline unsigned long __must_check +copy_mc_to_kernel(void *dst, const void *src, size_t cnt) +{ + memcpy(dst, src, cnt); + return 0; +} +#endif + static __always_inline void pagefault_disabled_inc(void) { current->pagefault_disabled++; diff --git a/include/linux/uio.h b/include/linux/uio.h index 3835a8a8e9ea..f14410c678bd 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -185,10 +185,10 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); #define _copy_from_iter_flushcache _copy_from_iter_nocache #endif -#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE -size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i); +#ifdef CONFIG_ARCH_HAS_COPY_MC +size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); #else -#define _copy_to_iter_mcsafe _copy_to_iter +#define _copy_mc_to_iter _copy_to_iter #endif static __always_inline __must_check @@ -201,12 +201,12 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) } static __always_inline __must_check -size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i) +size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(!check_copy_size(addr, bytes, true))) return 0; else - return _copy_to_iter_mcsafe(addr, bytes, i); + return _copy_mc_to_iter(addr, bytes, i); } size_t iov_iter_zero(size_t bytes, struct iov_iter *); -- cgit v1.2.3 From 90428a8eb4947f9c7c905a178f3520dc7e2ee6d2 Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Mon, 28 Sep 2020 10:02:01 +0530 Subject: genirq/PM: Introduce IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND flag An interrupt that is disabled/masked but set for wakeup may still need to be able to wake up the system from sleep states like "suspend to RAM". To that effect, introduce the IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND flag. If the irqchip have this flag set, the irq PM code will enable/unmask the irqs that are marked for wakeup, but that are in a disabled state. On resume, such irqs will be restored back to their disabled state. Suggested-by: Thomas Gleixner Signed-off-by: Maulik Shah [maz: commit message fix-up] Signed-off-by: Marc Zyngier Tested-by: Stephen Boyd Reviewed-by: Thomas Gleixner Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/1601267524-20199-4-git-send-email-mkshah@codeaurora.org --- include/linux/irq.h | 49 ++++++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 1b7f4dfee35b..a8b84b88e673 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -215,6 +215,8 @@ struct irq_data { * from actual interrupt context. * IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call * irq_chip::irq_set_affinity() when deactivated. + * IRQD_IRQ_ENABLED_ON_SUSPEND - Interrupt is enabled on suspend by irq pm if + * irqchip have flag IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND set. */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -240,6 +242,7 @@ enum { IRQD_MSI_NOMASK_QUIRK = (1 << 27), IRQD_HANDLE_ENFORCE_IRQCTX = (1 << 28), IRQD_AFFINITY_ON_ACTIVATE = (1 << 29), + IRQD_IRQ_ENABLED_ON_SUSPEND = (1 << 30), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -319,6 +322,11 @@ static inline bool irqd_is_handle_enforce_irqctx(struct irq_data *d) return __irqd_to_state(d) & IRQD_HANDLE_ENFORCE_IRQCTX; } +static inline bool irqd_is_enabled_on_suspend(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_IRQ_ENABLED_ON_SUSPEND; +} + static inline bool irqd_is_wakeup_set(struct irq_data *d) { return __irqd_to_state(d) & IRQD_WAKEUP_STATE; @@ -545,27 +553,30 @@ struct irq_chip { /* * irq_chip specific flags * - * IRQCHIP_SET_TYPE_MASKED: Mask before calling chip.irq_set_type() - * IRQCHIP_EOI_IF_HANDLED: Only issue irq_eoi() when irq was handled - * IRQCHIP_MASK_ON_SUSPEND: Mask non wake irqs in the suspend path - * IRQCHIP_ONOFFLINE_ENABLED: Only call irq_on/off_line callbacks - * when irq enabled - * IRQCHIP_SKIP_SET_WAKE: Skip chip.irq_set_wake(), for this irq chip - * IRQCHIP_ONESHOT_SAFE: One shot does not require mask/unmask - * IRQCHIP_EOI_THREADED: Chip requires eoi() on unmask in threaded mode - * IRQCHIP_SUPPORTS_LEVEL_MSI Chip can provide two doorbells for Level MSIs - * IRQCHIP_SUPPORTS_NMI: Chip can deliver NMIs, only for root irqchips + * IRQCHIP_SET_TYPE_MASKED: Mask before calling chip.irq_set_type() + * IRQCHIP_EOI_IF_HANDLED: Only issue irq_eoi() when irq was handled + * IRQCHIP_MASK_ON_SUSPEND: Mask non wake irqs in the suspend path + * IRQCHIP_ONOFFLINE_ENABLED: Only call irq_on/off_line callbacks + * when irq enabled + * IRQCHIP_SKIP_SET_WAKE: Skip chip.irq_set_wake(), for this irq chip + * IRQCHIP_ONESHOT_SAFE: One shot does not require mask/unmask + * IRQCHIP_EOI_THREADED: Chip requires eoi() on unmask in threaded mode + * IRQCHIP_SUPPORTS_LEVEL_MSI: Chip can provide two doorbells for Level MSIs + * IRQCHIP_SUPPORTS_NMI: Chip can deliver NMIs, only for root irqchips + * IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND: Invokes __enable_irq()/__disable_irq() for wake irqs + * in the suspend path if they are in disabled state */ enum { - IRQCHIP_SET_TYPE_MASKED = (1 << 0), - IRQCHIP_EOI_IF_HANDLED = (1 << 1), - IRQCHIP_MASK_ON_SUSPEND = (1 << 2), - IRQCHIP_ONOFFLINE_ENABLED = (1 << 3), - IRQCHIP_SKIP_SET_WAKE = (1 << 4), - IRQCHIP_ONESHOT_SAFE = (1 << 5), - IRQCHIP_EOI_THREADED = (1 << 6), - IRQCHIP_SUPPORTS_LEVEL_MSI = (1 << 7), - IRQCHIP_SUPPORTS_NMI = (1 << 8), + IRQCHIP_SET_TYPE_MASKED = (1 << 0), + IRQCHIP_EOI_IF_HANDLED = (1 << 1), + IRQCHIP_MASK_ON_SUSPEND = (1 << 2), + IRQCHIP_ONOFFLINE_ENABLED = (1 << 3), + IRQCHIP_SKIP_SET_WAKE = (1 << 4), + IRQCHIP_ONESHOT_SAFE = (1 << 5), + IRQCHIP_EOI_THREADED = (1 << 6), + IRQCHIP_SUPPORTS_LEVEL_MSI = (1 << 7), + IRQCHIP_SUPPORTS_NMI = (1 << 8), + IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND = (1 << 9), }; #include -- cgit v1.2.3 From c6db31ffe202c3120147e9f3455a4dbc90546d39 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Mon, 5 Oct 2020 18:39:37 +0300 Subject: ethtool: allow netdev driver to define phy tunables Define get/set phy tunable callbacks in ethtool ops. This will allow MAC drivers with integrated PHY still to implement these tunables. Reviewed-by: Andrew Lunn Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/ethtool.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 060b20f0b20f..6408b446051f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -505,6 +505,10 @@ struct ethtool_ops { struct ethtool_fecparam *); void (*get_ethtool_phy_stats)(struct net_device *, struct ethtool_stats *, u64 *); + int (*get_phy_tunable)(struct net_device *, + const struct ethtool_tunable *, void *); + int (*set_phy_tunable)(struct net_device *, + const struct ethtool_tunable *, const void *); }; int ethtool_check_ops(const struct ethtool_ops *ops); -- cgit v1.2.3 From 451b05f413d3fd89ee84ae99a5029f619ed3cb78 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 5 Oct 2020 22:34:18 +0200 Subject: net: netdevice.h: sw_netstats_rx_add helper some drivers/network protocols update rx bytes/packets under u64_stats_update_begin/end sequence. Add a specific helper like dev_lstats_add() Signed-off-by: Fabian Frederick Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d126e36580c9..a0df43b13839 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2543,6 +2543,16 @@ struct pcpu_lstats { void dev_lstats_read(struct net_device *dev, u64 *packets, u64 *bytes); +static inline void dev_sw_netstats_rx_add(struct net_device *dev, unsigned int len) +{ + struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + tstats->rx_bytes += len; + tstats->rx_packets++; + u64_stats_update_end(&tstats->syncp); +} + static inline void dev_lstats_add(struct net_device *dev, unsigned int len) { struct pcpu_lstats *lstats = this_cpu_ptr(dev->lstats); -- cgit v1.2.3 From 2b0d3d3e4fcfb19d10f9a82910b8f0f05c56ee3e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 1 Oct 2020 23:48:41 +0800 Subject: percpu_ref: reduce memory footprint of percpu_ref in fast path 'struct percpu_ref' is often embedded into one user structure, and the instance is usually referenced in fast path, however actually only 'percpu_count_ptr' is needed in fast path. So move other fields into one new structure of 'percpu_ref_data', and allocate it dynamically via kzalloc(), then memory footprint of 'percpu_ref' in fast path is reduced a lot and becomes suitable to put into hot cacheline of user structure. Signed-off-by: Ming Lei Tested-by: Veronika Kabatova Reviewed-by: Christoph Hellwig Acked-by: Tejun Heo Cc: Sagi Grimberg Cc: Tejun Heo Cc: Christoph Hellwig Cc: Jens Axboe Cc: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/percpu-refcount.h | 52 +++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 87d8a38bdea1..16c35a728b4c 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -92,18 +92,30 @@ enum { PERCPU_REF_ALLOW_REINIT = 1 << 2, }; -struct percpu_ref { +struct percpu_ref_data { atomic_long_t count; - /* - * The low bit of the pointer indicates whether the ref is in percpu - * mode; if set, then get/put will manipulate the atomic_t. - */ - unsigned long percpu_count_ptr; percpu_ref_func_t *release; percpu_ref_func_t *confirm_switch; bool force_atomic:1; bool allow_reinit:1; struct rcu_head rcu; + struct percpu_ref *ref; +}; + +struct percpu_ref { + /* + * The low bit of the pointer indicates whether the ref is in percpu + * mode; if set, then get/put will manipulate the atomic_t. + */ + unsigned long percpu_count_ptr; + + /* + * 'percpu_ref' is often embedded into user structure, and only + * 'percpu_count_ptr' is required in fast path, move other fields + * into 'percpu_ref_data', so we can reduce memory footprint in + * fast path. + */ + struct percpu_ref_data *data; }; int __must_check percpu_ref_init(struct percpu_ref *ref, @@ -118,6 +130,7 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill); void percpu_ref_resurrect(struct percpu_ref *ref); void percpu_ref_reinit(struct percpu_ref *ref); +bool percpu_ref_is_zero(struct percpu_ref *ref); /** * percpu_ref_kill - drop the initial ref @@ -191,7 +204,7 @@ static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr) if (__ref_is_percpu(ref, &percpu_count)) this_cpu_add(*percpu_count, nr); else - atomic_long_add(nr, &ref->count); + atomic_long_add(nr, &ref->data->count); rcu_read_unlock(); } @@ -231,7 +244,7 @@ static inline bool percpu_ref_tryget_many(struct percpu_ref *ref, this_cpu_add(*percpu_count, nr); ret = true; } else { - ret = atomic_long_add_unless(&ref->count, nr, 0); + ret = atomic_long_add_unless(&ref->data->count, nr, 0); } rcu_read_unlock(); @@ -279,7 +292,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) this_cpu_inc(*percpu_count); ret = true; } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) { - ret = atomic_long_inc_not_zero(&ref->count); + ret = atomic_long_inc_not_zero(&ref->data->count); } rcu_read_unlock(); @@ -305,8 +318,8 @@ static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr) if (__ref_is_percpu(ref, &percpu_count)) this_cpu_sub(*percpu_count, nr); - else if (unlikely(atomic_long_sub_and_test(nr, &ref->count))) - ref->release(ref); + else if (unlikely(atomic_long_sub_and_test(nr, &ref->data->count))) + ref->data->release(ref); rcu_read_unlock(); } @@ -339,21 +352,4 @@ static inline bool percpu_ref_is_dying(struct percpu_ref *ref) return ref->percpu_count_ptr & __PERCPU_REF_DEAD; } -/** - * percpu_ref_is_zero - test whether a percpu refcount reached zero - * @ref: percpu_ref to test - * - * Returns %true if @ref reached zero. - * - * This function is safe to call as long as @ref is between init and exit. - */ -static inline bool percpu_ref_is_zero(struct percpu_ref *ref) -{ - unsigned long __percpu *percpu_count; - - if (__ref_is_percpu(ref, &percpu_count)) - return false; - return !atomic_long_read(&ref->count); -} - #endif -- cgit v1.2.3 From 0549e87c30ae0d43d7e10424f2222bcd8bbe986d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 1 Oct 2020 23:48:42 +0800 Subject: block: move 'q_usage_counter' into front of 'request_queue' The field of 'q_usage_counter' is always fetched in fast path of every block driver, and move it into front of 'request_queue', so it can be fetched into 1st cacheline of 'request_queue' instance. Signed-off-by: Ming Lei Tested-by: Veronika Kabatova Reviewed-by: Christoph Hellwig Cc: Sagi Grimberg Cc: Tejun Heo Cc: Christoph Hellwig Cc: Jens Axboe Cc: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cf80e61b4c5e..5fc940423e5a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -397,6 +397,8 @@ struct request_queue { struct request *last_merge; struct elevator_queue *elevator; + struct percpu_ref q_usage_counter; + struct blk_queue_stats *stats; struct rq_qos *rq_qos; @@ -569,7 +571,6 @@ struct request_queue { * percpu_ref_kill() and percpu_ref_reinit(). */ struct mutex mq_freeze_lock; - struct percpu_ref q_usage_counter; struct blk_mq_tag_set *tag_set; struct list_head tag_set_list; -- cgit v1.2.3 From 92cf2fd156b273879198bb1d7e58851f822c481f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 Oct 2020 09:07:17 +0200 Subject: block: remove the unused blk_integrity_merge_rq export Also move the definition from the public blkdev.h to the private block/blk.h header. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5fc940423e5a..6a6d9ea7420a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1664,8 +1664,6 @@ extern int blk_integrity_compare(struct gendisk *, struct gendisk *); extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, struct scatterlist *); extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); -extern bool blk_integrity_merge_rq(struct request_queue *, struct request *, - struct request *); extern bool blk_integrity_merge_bio(struct request_queue *, struct request *, struct bio *); @@ -1795,12 +1793,6 @@ static inline unsigned short queue_max_integrity_segments(const struct request_q { return 0; } -static inline bool blk_integrity_merge_rq(struct request_queue *rq, - struct request *r1, - struct request *r2) -{ - return true; -} static inline bool blk_integrity_merge_bio(struct request_queue *rq, struct request *r, struct bio *b) -- cgit v1.2.3 From d59da41998bc794441d7c039a059ed6eb0c2dc4d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 Oct 2020 09:07:18 +0200 Subject: block: remove the unused blk_integrity_merge_bio export Also move the definition from the public blkdev.h to the private block/blk.h header. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6a6d9ea7420a..cda786fa4341 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1664,8 +1664,6 @@ extern int blk_integrity_compare(struct gendisk *, struct gendisk *); extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, struct scatterlist *); extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); -extern bool blk_integrity_merge_bio(struct request_queue *, struct request *, - struct bio *); static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) { @@ -1793,12 +1791,6 @@ static inline unsigned short queue_max_integrity_segments(const struct request_q { return 0; } -static inline bool blk_integrity_merge_bio(struct request_queue *rq, - struct request *r, - struct bio *b) -{ - return true; -} static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, unsigned int sectors) -- cgit v1.2.3 From f55a52bb2cdbc5a92ca209d0ada90a490a188f58 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Sat, 3 Oct 2020 00:41:46 +0900 Subject: can: dev: fix type of get_can_dlc() and get_canfd_dlc() macros The macros get_can_dlc() and get_canfd_dlc() are not visible in userland. As such, type u8 should be preferred over type __u8. Reference: https://lkml.org/lkml/2020/10/1/708 Signed-off-by: Vincent Mailhol Link: https://lore.kernel.org/r/20201002154219.4887-3-mailhol.vincent@wanadoo.fr Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index ed0482b2f4b2..f8975d0b90bb 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -84,13 +84,13 @@ struct can_priv { /* * get_can_dlc(value) - helper macro to cast a given data length code (dlc) - * to __u8 and ensure the dlc value to be max. 8 bytes. + * to u8 and ensure the dlc value to be max. 8 bytes. * * To be used in the CAN netdriver receive path to ensure conformance with * ISO 11898-1 Chapter 8.4.2.3 (DLC field) */ -#define get_can_dlc(i) (min_t(__u8, (i), CAN_MAX_DLC)) -#define get_canfd_dlc(i) (min_t(__u8, (i), CANFD_MAX_DLC)) +#define get_can_dlc(i) (min_t(u8, (i), CAN_MAX_DLC)) +#define get_canfd_dlc(i) (min_t(u8, (i), CANFD_MAX_DLC)) /* Check for outgoing skbs that have not been created by the CAN subsystem */ static inline bool can_skb_headroom_valid(struct net_device *dev, -- cgit v1.2.3 From 6fcd669514794da08ce6bfa272b6ec9b33cb543d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Aug 2020 09:56:58 +0200 Subject: block: optimize blk_queue_zoned_model for !CONFIG_BLK_DEV_ZONED Always return BLK_ZONED_NONE if zoned device support is not enabled. This allows various compiler optimizations including the dead code elimination that we so like for avoiding ifdefs. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal --- include/linux/blkdev.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8e77f12de522..1b81b2766858 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -692,7 +692,9 @@ static inline bool queue_is_mq(struct request_queue *q) static inline enum blk_zoned_model blk_queue_zoned_model(struct request_queue *q) { - return q->limits.zoned; + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) + return q->limits.zoned; + return BLK_ZONED_NONE; } static inline bool blk_queue_is_zoned(struct request_queue *q) -- cgit v1.2.3 From 200da27ab32d89ca43bd8ef010f84b205e5c780a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 6 Oct 2020 13:11:15 -0700 Subject: LSM: Fix type of id parameter in kernel_post_load_data prototype Clang warns: security/security.c:1716:59: warning: implicit conversion from enumeration type 'enum kernel_load_data_id' to different enumeration type 'enum kernel_read_file_id' [-Wenum-conversion] ret = call_int_hook(kernel_post_load_data, 0, buf, size, id, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~ security/security.c:715:22: note: expanded from macro 'call_int_hook' RC = P->hook.FUNC(__VA_ARGS__); \ ~ ^~~~~~~~~~~ 1 warning generated. There is a mismatch between the id parameter type in security_kernel_post_load_data and the function pointer prototype that is created by the LSM_HOOK macro in the security_list_options union. Fix the type in the LSM_HOOK macro as 'enum kernel_load_data_id' is what is expected. Fixes: b64fcae74b6d ("LSM: Introduce kernel_post_load_data() hook") Acked-by: Kees Cook Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20201006201115.716550-1-natechancellor@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/lsm_hook_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index d67cb3502310..32a940117e7a 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -186,7 +186,7 @@ LSM_HOOK(int, 0, kernel_create_files_as, struct cred *new, struct inode *inode) LSM_HOOK(int, 0, kernel_module_request, char *kmod_name) LSM_HOOK(int, 0, kernel_load_data, enum kernel_load_data_id id, bool contents) LSM_HOOK(int, 0, kernel_post_load_data, char *buf, loff_t size, - enum kernel_read_file_id id, char *description) + enum kernel_load_data_id id, char *description) LSM_HOOK(int, 0, kernel_read_file, struct file *file, enum kernel_read_file_id id, bool contents) LSM_HOOK(int, 0, kernel_post_read_file, struct file *file, char *buf, -- cgit v1.2.3 From 849facea92fa68d9292f9b06d7c4ee9e7a06b8dc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 Oct 2020 11:04:08 +0200 Subject: dma-direct: simplify the DMA_ATTR_NO_KERNEL_MAPPING handling Use and entirely separate code path for the DMA_ATTR_NO_KERNEL_MAPPING path. This avoids any confusion about the ret type, and avoids lots of attr checks and helpers that can be significantly simplified now. It also ensures that common handling is applied to architetures still using the arch alloc/free hooks. Signed-off-by: Christoph Hellwig --- include/linux/dma-map-ops.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 33c6e24707a9..8029f7e04145 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -219,19 +219,6 @@ static inline bool dev_is_dma_coherent(struct device *dev) } #endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ -/* - * Check if an allocation needs to be marked uncached to be coherent. - */ -static __always_inline bool dma_alloc_need_uncached(struct device *dev, - unsigned long attrs) -{ - if (dev_is_dma_coherent(dev)) - return false; - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) - return false; - return true; -} - void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, -- cgit v1.2.3 From c0ab7ffce275d3f83bd253c70889c28821d4a41d Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 6 Oct 2020 14:09:09 -0700 Subject: x86/mce: Recover from poison found while copying from user space Existing kernel code can only recover from a machine check on code that is tagged in the exception table with a fault handling recovery path. Add two new fields in the task structure to pass information from machine check handler to the "task_work" that is queued to run before the task returns to user mode: + mce_vaddr: will be initialized to the user virtual address of the fault in the case where the fault occurred in the kernel copying data from a user address. This is so that kill_me_maybe() can provide that information to the user SIGBUS handler. + mce_kflags: copy of the struct mce.kflags needed by kill_me_maybe() to determine if mce_vaddr is applicable to this error. Add code to recover from a machine check while copying data from user space to the kernel. Action for this case is the same as if the user touched the poison directly; unmap the page and send a SIGBUS to the task. Use a new helper function to share common code between the "fault in user mode" case and the "fault while copying from user" case. New code paths will be activated by the next patch which sets MCE_IN_KERNEL_COPYIN. Suggested-by: Borislav Petkov Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20201006210910.21062-6-tony.luck@intel.com --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 93ecd930efd3..2cbba3e2b150 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1308,6 +1308,8 @@ struct task_struct { #endif #ifdef CONFIG_X86_MCE + void __user *mce_vaddr; + __u64 mce_kflags; u64 mce_addr; __u64 mce_ripv : 1, mce_whole_page : 1, -- cgit v1.2.3 From c33fe275b530f1ce1ab99923d50eb399f53ed545 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Thu, 24 Sep 2020 11:39:08 -0500 Subject: fs: remove no longer used dio_end_io() Since we removed the last user of dio_end_io() when btrfs got converted to iomap infrastructure ("btrfs: switch to iomap for direct IO"), remove the helper function dio_end_io(). Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Reviewed-by: Josef Bacik Signed-off-by: Goldwyn Rodrigues Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7519ae003a08..8e2842a9c0b3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3079,8 +3079,6 @@ enum { DIO_SKIP_HOLES = 0x02, }; -void dio_end_io(struct bio *bio); - ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, get_block_t get_block, -- cgit v1.2.3 From dcb5cdf60a1fbbdb3b4dd2abc562206481f09ef1 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sat, 3 Oct 2020 13:19:42 +0530 Subject: powerpc/perf/hv-gpci: Add cpu hotplug support Patch here adds cpu hotplug functions to hv_gpci pmu. A new cpuhp_state "CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE" enum is added. The online callback function updates the cpumask only if its empty. As the primary intention of adding hotplug support is to designate a CPU to make HCALL to collect the counter data. The offline function test and clear corresponding cpu in a cpumask and update cpumask to any other active cpu. Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201003074943.338618-4-kjain@linux.ibm.com --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 3215023d4852..5d08ed922510 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -183,6 +183,7 @@ enum cpuhp_state { CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE, + CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, CPUHP_AP_WATCHDOG_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, -- cgit v1.2.3 From dd841a749d1ded8e2e5facc4242ee0b6779fc0cb Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 14 Jun 2020 06:07:10 -0400 Subject: radix tree test suite: Fix compilation Introducing local_lock broke compilation; fix it all up. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/radix-tree.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c2a9f7c90727..5c85059a92ba 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From fe6f0cdc49263ae61cd3d33399662808c2398e86 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 7 Oct 2020 21:35:08 +0900 Subject: block: soft limit zone-append sectors as well Martin rightfully noted that for normal filesystem IO we have soft limits in place, to prevent them from getting too big and not lead to unpredictable latencies. For zone append we only have the hardware limit in place. Cap the max sectors we submit via zone-append to the maximal number of sectors if the second limit is lower. Reported-by: Martin K. Petersen Signed-off-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/linux-btrfs/yq1k0w8g3rw.fsf@ca-mkp.ca.oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cda786fa4341..6bd667c34777 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1407,7 +1407,10 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q) static inline unsigned int queue_max_zone_append_sectors(const struct request_queue *q) { - return q->limits.max_zone_append_sectors; + + const struct queue_limits *l = &q->limits; + + return min(l->max_zone_append_sectors, l->max_sectors); } static inline unsigned queue_logical_block_size(const struct request_queue *q) -- cgit v1.2.3 From 24a1877286822293684ef3f7bada4ea48a6e129e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 24 Sep 2020 17:48:51 +0200 Subject: locking/seqlock: Tweak DEFINE_SEQLOCK() kernel doc ctags creates a warning: |ctags: Warning: include/linux/seqlock.h:738: null expansion of name pattern "\2" The DEFINE_SEQLOCK() macro is passed to ctags and being told to expect an argument. Add a dummy argument to keep ctags quiet. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Link: https://lkml.kernel.org/r/20200924154851.skmswuyj322yuz4g@linutronix.de --- include/linux/seqlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 76e44e6c0100..ac5b07f558b0 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -821,7 +821,7 @@ typedef struct { } while (0) /** - * DEFINE_SEQLOCK() - Define a statically allocated seqlock_t + * DEFINE_SEQLOCK(sl) - Define a statically allocated seqlock_t * @sl: Name of the seqlock_t instance */ #define DEFINE_SEQLOCK(sl) \ -- cgit v1.2.3 From cf1f08cac375630af6b6307907a3fc20fcf847c7 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 17 Apr 2020 11:00:24 -0400 Subject: SUNRPC: Implement a xdr_page_pos() function I'll need this for READ_PLUS to help figure out the offset where page data is stored at, but it might also be useful for other things. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5a6a81b7cd9f..25a68dd87ecf 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -240,6 +240,7 @@ extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); +extern unsigned int xdr_page_pos(const struct xdr_stream *xdr); extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, -- cgit v1.2.3 From c567552612ece787b178e3b147b5854ad422a836 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 28 May 2014 13:41:22 -0400 Subject: NFS: Add READ_PLUS data segment support This patch adds client support for decoding a single NFS4_CONTENT_DATA segment returned by the server. This is the simplest implementation possible, since it does not account for any hole segments in the reply. Signed-off-by: Anna Schumaker --- include/linux/nfs4.h | 2 +- include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index b8360be141da..9dc7eeac924f 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -551,13 +551,13 @@ enum { NFSPROC4_CLNT_LOOKUPP, NFSPROC4_CLNT_LAYOUTERROR, - NFSPROC4_CLNT_COPY_NOTIFY, NFSPROC4_CLNT_GETXATTR, NFSPROC4_CLNT_SETXATTR, NFSPROC4_CLNT_LISTXATTRS, NFSPROC4_CLNT_REMOVEXATTR, + NFSPROC4_CLNT_READ_PLUS, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 7eae72a8762e..38e60ec742df 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -287,5 +287,6 @@ struct nfs_server { #define NFS_CAP_LAYOUTERROR (1U << 26) #define NFS_CAP_COPY_NOTIFY (1U << 27) #define NFS_CAP_XATTR (1U << 28) +#define NFS_CAP_READ_PLUS (1U << 29) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0599efd57eb9..d63cb862d58e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -657,7 +657,7 @@ struct nfs_pgio_args { struct nfs_pgio_res { struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; - __u32 count; + __u64 count; __u32 op_status; union { struct { -- cgit v1.2.3 From 84ce182ab85b8ad5002fb1125ba572df99dd0d1c Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 28 May 2014 13:38:53 -0400 Subject: SUNRPC: Add the ability to expand holes in data pages This patch adds the ability to "read a hole" into a set of XDR data pages by taking the following steps: 1) Shift all data after the current xdr->p to the right, possibly into the tail, 2) Zero the specified range, and 3) Update xdr->p to point beyond the hole. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 25a68dd87ecf..f9636d2a6d54 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -250,6 +250,7 @@ extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); /** * xdr_stream_remaining - Return the number of bytes remaining in the stream -- cgit v1.2.3 From e6ac0accb27c6892b7ebc7799e7ce56b3390a678 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 21 Apr 2020 11:27:00 -0400 Subject: SUNRPC: Add an xdr_align_data() function For now, this function simply aligns the data at the beginning of the pages. This can eventually be expanded to shift data to the correct offsets when we're ready. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index f9636d2a6d54..fe7ff7f5b584 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -250,6 +250,7 @@ extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); /** -- cgit v1.2.3 From 1c47fa6b31c2683f03bc2f9174902bb7dcd35d83 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Sat, 3 Oct 2020 00:41:49 +0900 Subject: can: dev: add a helper function to calculate the duration of one bit Rename macro CAN_CALC_SYNC_SEG to CAN_SYNC_SEG and make it available through include/linux/can/dev.h Add an helper function can_bit_time() which returns the duration (in time quanta) of one CAN bit. Rationale for this patch: the sync segment and the bit time are two concepts which are defined in the CAN ISO standard. Device drivers for CAN might need those. Please refer to ISO 11898-1:2015, section 11.3.1.1 "Bit time" for additional information. Signed-off-by: Vincent Mailhol Link: https://lore.kernel.org/r/20201002154219.4887-6-mailhol.vincent@wanadoo.fr [mkl: Let can_bit_time() return an unsinged int, make argument const] Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index f8975d0b90bb..41ff31795320 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -82,6 +82,21 @@ struct can_priv { #endif }; +#define CAN_SYNC_SEG 1 + +/* + * can_bit_time() - Duration of one bit + * + * Please refer to ISO 11898-1:2015, section 11.3.1.1 "Bit time" for + * additional information. + * + * Return: the number of time quanta in one bit. + */ +static inline unsigned int can_bit_time(const struct can_bittiming *bt) +{ + return CAN_SYNC_SEG + bt->prop_seg + bt->phase_seg1 + bt->phase_seg2; +} + /* * get_can_dlc(value) - helper macro to cast a given data length code (dlc) * to u8 and ensure the dlc value to be max. 8 bytes. -- cgit v1.2.3 From 638eae9bc7eb1012d1e0f5a8fd4db46447f822e9 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 23 Sep 2020 20:20:51 +0300 Subject: platform_data/mlxreg: Update module license Update license to SPDX-License. Signed-off-by: Vadim Pasternak Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20200923172053.26296-4-vadimp@nvidia.com Signed-off-by: Hans de Goede --- include/linux/platform_data/mlxreg.h | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index 1af9c01563f9..0a727d405a7a 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -1,34 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ /* - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Vadim Pasternak - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. + * Copyright (C) 2017-2020 Mellanox Technologies Ltd. */ #ifndef __LINUX_PLATFORM_DATA_MLXREG_H -- cgit v1.2.3 From d2f3ab5b6b05f67817f3bd3f2fda5f0126e95a62 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 23 Sep 2020 20:20:52 +0300 Subject: platform_data/mlxreg: Extend core platform structure Add 'capability' field to structure 'mlxreg_core_platform_data'. The purpose of this filed to indicate the actual number of the components within the particular group. Such components could be, for example the number of the FAN drawers. Some systems are equipped with FAN drawers with one tachometer inside, others with FAN drawers with several tachometers inside. Signed-off-by: Vadim Pasternak Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20200923172053.26296-5-vadimp@nvidia.com Signed-off-by: Hans de Goede --- include/linux/platform_data/mlxreg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index 0a727d405a7a..101333fe2b8d 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -109,6 +109,7 @@ struct mlxreg_core_item { * @features: supported features of device; * @version: implementation version; * @identity: device identity name; + * @capability: device capability register; */ struct mlxreg_core_platform_data { struct mlxreg_core_data *data; @@ -117,6 +118,7 @@ struct mlxreg_core_platform_data { u32 features; u32 version; char identity[MLXREG_CORE_LABEL_MAX_SIZE]; + u32 capability; }; /** -- cgit v1.2.3 From 9c37de297f6590937f95a28bec1b7ac68a38618f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 7 Oct 2020 15:15:08 -0400 Subject: dm: remove special-casing of bio-based immutable singleton target on NVMe Since commit 5a6c35f9af416 ("block: remove direct_make_request") there is no benefit to DM special-casing NVMe. Remove all code used to establish DM_TYPE_NVME_BIO_BASED. Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index d6f8d4ba8d48..61a66fb8ebb3 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -29,7 +29,6 @@ enum dm_queue_mode { DM_TYPE_BIO_BASED = 1, DM_TYPE_REQUEST_BASED = 2, DM_TYPE_DAX_BIO_BASED = 3, - DM_TYPE_NVME_BIO_BASED = 4, }; typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; -- cgit v1.2.3 From 8dc4bd073663fa8aba2fae08b1c23ab41a2e97a2 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 7 Oct 2020 23:15:47 -0700 Subject: usb: typec: tcpm: Add support for Sink Fast Role SWAP(FRS) PD 3.0 spec defines a new mechanism for power role swap called Fast role swap. This change enables TCPM to support FRS when acting as sink. Once the explicit contract is negotiated, sink port is expected to query the source port for sink caps to determine whether the source is FRS capable. Bits 23 & 24 of fixed pdo of the sink caps from the source, when set, indicates the current needed by the source when fast role swap is in progress(Implicit contract phasae). 0 indicates that the source does not support Fast Role Swap. Upon receiving the FRS signal from the source, TCPC(TCPM_FRS_EVENT) informs TCPM to start the Fast role swap sequence. 1. TCPM sends FRS PD message: FR_SWAP_SEND 2. If response is not received within the expiry of SenderResponseTimer, Error recovery is triggered.: FR_SWAP_SEND_TIMEOUT 3. Upon receipt of the accept message, TCPM waits for PSSourceOffTimer for PS_READY message from the partner: FR_SWAP_SNK_SRC_NEW_SINK_READY. TCPC is expected to autonomously turn on vbus once the FRS signal is received and vbus voltage falls below vsafe5v within tSrcFrSwap. This is different from traditional power role swap where the vbus sourcing is turned on by TCPM. 4. By this time, TCPC most likely would have started to source vbus, TCPM waits for tSrcFrSwap to see if the lower level TCPC driver signals TCPM_SOURCING_VBUS event: FR_SWAP_SNK_SRC_SOURCE_VBUS_APPLIED. 5. When TCPC signals sourcing vbus, TCPM sends PS_READY msg and changes the CC pin from Rd to Rp. This is the end of fast role swap sequence and TCPM initiates the sequnce to negotiate explicit contract by transitioning into SRC_STARTUP after SwapSrcStart. The code is written based on the sequence described in "Figure 8-107: Dual-role Port in Sink to Source Fast Role Swap State Diagram" of USB Power Delivery Specification Revision 3.0, Version 1.2. Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201008061556.1402293-7-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd.h | 19 +++++++++++-------- include/linux/usb/tcpm.h | 8 +++++++- 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index f842e4589bd2..3a805e2ecbc9 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -219,14 +219,16 @@ enum pd_pdo_type { #define PDO_CURR_MASK 0x3ff #define PDO_PWR_MASK 0x3ff -#define PDO_FIXED_DUAL_ROLE BIT(29) /* Power role swap supported */ -#define PDO_FIXED_SUSPEND BIT(28) /* USB Suspend supported (Source) */ -#define PDO_FIXED_HIGHER_CAP BIT(28) /* Requires more than vSafe5V (Sink) */ -#define PDO_FIXED_EXTPOWER BIT(27) /* Externally powered */ -#define PDO_FIXED_USB_COMM BIT(26) /* USB communications capable */ -#define PDO_FIXED_DATA_SWAP BIT(25) /* Data role swap supported */ -#define PDO_FIXED_VOLT_SHIFT 10 /* 50mV units */ -#define PDO_FIXED_CURR_SHIFT 0 /* 10mA units */ +#define PDO_FIXED_DUAL_ROLE BIT(29) /* Power role swap supported */ +#define PDO_FIXED_SUSPEND BIT(28) /* USB Suspend supported (Source) */ +#define PDO_FIXED_HIGHER_CAP BIT(28) /* Requires more than vSafe5V (Sink) */ +#define PDO_FIXED_EXTPOWER BIT(27) /* Externally powered */ +#define PDO_FIXED_USB_COMM BIT(26) /* USB communications capable */ +#define PDO_FIXED_DATA_SWAP BIT(25) /* Data role swap supported */ +#define PDO_FIXED_FRS_CURR_MASK (BIT(24) | BIT(23)) /* FR_Swap Current (Sink) */ +#define PDO_FIXED_FRS_CURR_SHIFT 23 +#define PDO_FIXED_VOLT_SHIFT 10 /* 50mV units */ +#define PDO_FIXED_CURR_SHIFT 0 /* 10mA units */ #define PDO_FIXED_VOLT(mv) ((((mv) / 50) & PDO_VOLT_MASK) << PDO_FIXED_VOLT_SHIFT) #define PDO_FIXED_CURR(ma) ((((ma) / 10) & PDO_CURR_MASK) << PDO_FIXED_CURR_SHIFT) @@ -454,6 +456,7 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_T_DB_DETECT 10000 /* 10 - 15 seconds */ #define PD_T_SEND_SOURCE_CAP 150 /* 100 - 200 ms */ #define PD_T_SENDER_RESPONSE 60 /* 24 - 30 ms, relaxed */ +#define PD_T_RECEIVER_RESPONSE 15 /* 15ms max */ #define PD_T_SOURCE_ACTIVITY 45 #define PD_T_SINK_ACTIVITY 135 #define PD_T_SINK_WAIT_CAP 240 diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index 89f58760cf48..09762d26fa0c 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -78,8 +78,11 @@ enum tcpm_transmit_type { * automatically if a connection is established. * @try_role: Optional; called to set a preferred role * @pd_transmit:Called to transmit PD message - * @mux: Pointer to multiplexer data * @set_bist_data: Turn on/off bist data mode for compliance testing + * @enable_frs: + * Optional; Called to enable/disable PD 3.0 fast role swap. + * Enabling frs is accessory dependent as not all PD3.0 + * accessories support fast role swap. */ struct tcpc_dev { struct fwnode_handle *fwnode; @@ -105,6 +108,7 @@ struct tcpc_dev { int (*pd_transmit)(struct tcpc_dev *dev, enum tcpm_transmit_type type, const struct pd_message *msg); int (*set_bist_data)(struct tcpc_dev *dev, bool on); + int (*enable_frs)(struct tcpc_dev *dev, bool enable); }; struct tcpm_port; @@ -114,6 +118,8 @@ void tcpm_unregister_port(struct tcpm_port *port); void tcpm_vbus_change(struct tcpm_port *port); void tcpm_cc_change(struct tcpm_port *port); +void tcpm_sink_frs(struct tcpm_port *port); +void tcpm_sourcing_vbus(struct tcpm_port *port); void tcpm_pd_receive(struct tcpm_port *port, const struct pd_message *msg); void tcpm_pd_transmit_complete(struct tcpm_port *port, -- cgit v1.2.3 From 8446466c9dd645da4c1848f35ffd0fc1df3524ee Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 6 Aug 2020 10:07:24 -0400 Subject: XArray: Fix xas_for_each_conflict documentation At one point, xas_for_each_conflict() was going to work this way, and I forgot to update the documentation when I changed my mind. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index b4d70e7568b2..6b336098fca7 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1714,13 +1714,12 @@ enum { * @xas: XArray operation state. * @entry: Entry retrieved from the array. * - * The loop body will be executed for each entry in the XArray that lies - * within the range specified by @xas. If the loop completes successfully, - * any entries that lie in this range will be replaced by @entry. The caller - * may break out of the loop; if they do so, the contents of the XArray will - * be unchanged. The operation may fail due to an out of memory condition. - * The caller may also call xa_set_err() to exit the loop while setting an - * error to record the reason. + * The loop body will be executed for each entry in the XArray that + * lies within the range specified by @xas. If the loop terminates + * normally, @entry will be %NULL. The user may break out of the loop, + * which will leave @entry set to the conflicting entry. The caller + * may also call xa_set_err() to exit the loop while setting an error + * to record the reason. */ #define xas_for_each_conflict(xas, entry) \ while ((entry = xas_find_conflict(xas))) -- cgit v1.2.3 From a20b7053b5c47cd7de23288238ea4d23502f300a Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Thu, 24 Sep 2020 13:30:15 +0100 Subject: cpufreq,arm,arm64: restructure definitions of arch_set_freq_scale() Compared to other arch_* functions, arch_set_freq_scale() has an atypical weak definition that can be replaced by a strong architecture specific implementation. The more typical support for architectural functions involves defining an empty stub in a header file if the symbol is not already defined in architecture code. Some examples involve: - #define arch_scale_freq_capacity topology_get_freq_scale - #define arch_scale_freq_invariant topology_scale_freq_invariant - #define arch_scale_cpu_capacity topology_get_cpu_scale - #define arch_update_cpu_topology topology_update_cpu_topology - #define arch_scale_thermal_pressure topology_get_thermal_pressure - #define arch_set_thermal_pressure topology_set_thermal_pressure Bring arch_set_freq_scale() in line with these functions by renaming it to topology_set_freq_scale() in the arch topology driver, and by defining the arch_set_freq_scale symbol to point to the new function for arm and arm64. While there are other users of the arch_topology driver, this patch defines arch_set_freq_scale for arm and arm64 only, due to their existing definitions of arch_scale_freq_capacity. This is the getter function of the frequency invariance scale factor and without a getter function, the setter function - arch_set_freq_scale() has not purpose. Signed-off-by: Ionela Voinescu Acked-by: Viresh Kumar Acked-by: Catalin Marinas Acked-by: Sudeep Holla (BL_SWITCHER and topology parts) Signed-off-by: Rafael J. Wysocki --- include/linux/arch_topology.h | 2 ++ include/linux/cpufreq.h | 11 ++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 083df331a3c9..0f6cd6b73a61 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -30,6 +30,8 @@ static inline unsigned long topology_get_freq_scale(int cpu) return per_cpu(freq_scale, cpu); } +void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, + unsigned long max_freq); bool topology_scale_freq_invariant(void); bool arch_freq_counters_available(const struct cpumask *cpus); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 9f779fbdbe7b..fa37b1c66443 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1011,9 +1011,14 @@ static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy, extern void arch_freq_prepare_all(void); extern unsigned int arch_freq_get_on_cpu(int cpu); -extern void arch_set_freq_scale(const struct cpumask *cpus, - unsigned long cur_freq, - unsigned long max_freq); +#ifndef arch_set_freq_scale +static __always_inline +void arch_set_freq_scale(const struct cpumask *cpus, + unsigned long cur_freq, + unsigned long max_freq) +{ +} +#endif /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; -- cgit v1.2.3 From 2ef0342530b0f487a65b5f6c94c85dce5f9b53b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Oct 2020 08:53:03 +0200 Subject: PNP: remove the now unused pnp_find_card() function All user of the pnp_find_card() compat wrapper are gone, so remove the function as well. Signed-off-by: Christoph Hellwig Signed-off-by: Rafael J. Wysocki --- include/linux/isapnp.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/isapnp.h b/include/linux/isapnp.h index 11edb2109a68..dba18c95844b 100644 --- a/include/linux/isapnp.h +++ b/include/linux/isapnp.h @@ -75,9 +75,6 @@ static inline int isapnp_proc_done(void) { return 0; } #endif /* compat */ -struct pnp_card *pnp_find_card(unsigned short vendor, - unsigned short device, - struct pnp_card *from); struct pnp_dev *pnp_find_dev(struct pnp_card *card, unsigned short vendor, unsigned short function, @@ -92,9 +89,6 @@ static inline int isapnp_cfg_end(void) { return -ENODEV; } static inline unsigned char isapnp_read_byte(unsigned char idx) { return 0xff; } static inline void isapnp_write_byte(unsigned char idx, unsigned char val) { ; } -static inline struct pnp_card *pnp_find_card(unsigned short vendor, - unsigned short device, - struct pnp_card *from) { return NULL; } static inline struct pnp_dev *pnp_find_dev(struct pnp_card *card, unsigned short vendor, unsigned short function, -- cgit v1.2.3 From 02dae28f0b542969e44cbc1e14ffc9944cd2975c Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 31 Aug 2020 11:11:01 +0800 Subject: ftrace: Simplify the dyn_ftrace->flags macro All the three macro are defined to be used for ftrace_rec_count(). This can be achieved by (flags & FTRACE_REF_MAX) directly. Since no other places would use those macros, remove them for clarity. Also it fixes a typo in the comment. Link: https://lkml.kernel.org/r/20200831031104.23322-4-richard.weiyang@linux.alibaba.com Signed-off-by: Wei Yang Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e5c2d5cc6e6a..b1f56e3410dc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -432,7 +432,7 @@ bool is_ftrace_trampoline(unsigned long addr); * DIRECT - there is a direct function to call * * When a new ftrace_ops is registered and wants a function to save - * pt_regs, the rec->flag REGS is set. When the function has been + * pt_regs, the rec->flags REGS is set. When the function has been * set up to save regs, the REG_EN flag is set. Once a function * starts saving regs it will do so until all ftrace_ops are removed * from tracing that function. @@ -450,12 +450,9 @@ enum { }; #define FTRACE_REF_MAX_SHIFT 23 -#define FTRACE_FL_BITS 9 -#define FTRACE_FL_MASKED_BITS ((1UL << FTRACE_FL_BITS) - 1) -#define FTRACE_FL_MASK (FTRACE_FL_MASKED_BITS << FTRACE_REF_MAX_SHIFT) #define FTRACE_REF_MAX ((1UL << FTRACE_REF_MAX_SHIFT) - 1) -#define ftrace_rec_count(rec) ((rec)->flags & ~FTRACE_FL_MASK) +#define ftrace_rec_count(rec) ((rec)->flags & FTRACE_REF_MAX) struct dyn_ftrace { unsigned long ip; /* address of mcount call-site */ -- cgit v1.2.3 From 40dc4a42b97ef5a52ef34a73093a7992faaab15e Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 31 Aug 2020 11:11:04 +0800 Subject: ftrace: ftrace_global_list is renamed to ftrace_ops_list Fix the comment to comply with the code. Link: https://lkml.kernel.org/r/20200831031104.23322-7-richard.weiyang@linux.alibaba.com Signed-off-by: Wei Yang Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b1f56e3410dc..1bd3a0356ae4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -217,11 +217,11 @@ extern struct ftrace_ops __rcu *ftrace_ops_list; extern struct ftrace_ops ftrace_list_end; /* - * Traverse the ftrace_global_list, invoking all entries. The reason that we + * Traverse the ftrace_ops_list, invoking all entries. The reason that we * can use rcu_dereference_raw_check() is that elements removed from this list * are simply leaked, so there is no need to interact with a grace-period * mechanism. The rcu_dereference_raw_check() calls are needed to handle - * concurrent insertions into the ftrace_global_list. + * concurrent insertions into the ftrace_ops_list. * * Silly Alpha and silly pointer-speculation compiler optimizations! */ -- cgit v1.2.3 From 2bb8945bcc1a768f2bc402a16c9610bba8d5187d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Sep 2020 11:49:37 +0200 Subject: lockdep: Fix usage_traceoverflow Basically print_lock_class_header()'s for loop is out of sync with the the size of of ->usage_traces[]. Also clean things up a bit while at it, to avoid such mishaps in the future. Fixes: 23870f122768 ("locking/lockdep: Fix "USED" <- "IN-NMI" inversions") Reported-by: Qian Cai Debugged-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Qian Cai Link: https://lkml.kernel.org/r/20200930094937.GE2651@hirez.programming.kicks-ass.net --- include/linux/lockdep_types.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index bb35b449f533..9a1fd49df17f 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -35,8 +35,12 @@ enum lockdep_wait_type { /* * We'd rather not expose kernel/lockdep_states.h this wide, but we do need * the total number of states... :-( + * + * XXX_LOCK_USAGE_STATES is the number of lines in lockdep_states.h, for each + * of those we generates 4 states, Additionally we report on USED and USED_READ. */ -#define XXX_LOCK_USAGE_STATES (1+2*4) +#define XXX_LOCK_USAGE_STATES 2 +#define LOCK_TRACE_STATES (XXX_LOCK_USAGE_STATES*4 + 2) /* * NR_LOCKDEP_CACHING_CLASSES ... Number of classes @@ -106,7 +110,7 @@ struct lock_class { * IRQ/softirq usage tracking bits: */ unsigned long usage_mask; - const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES]; + const struct lock_trace *usage_traces[LOCK_TRACE_STATES]; /* * Generation counter, when doing certain classes of graph walking, -- cgit v1.2.3 From 4d004099a668c41522242aa146a38cc4eb59cb1e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 2 Oct 2020 11:04:21 +0200 Subject: lockdep: Fix lockdep recursion Steve reported that lockdep_assert*irq*(), when nested inside lockdep itself, will trigger a false-positive. One example is the stack-trace code, as called from inside lockdep, triggering tracing, which in turn calls RCU, which then uses lockdep_assert_irqs_disabled(). Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu variables") Reported-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 6a584b3e5c74..b1227be47496 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -534,6 +534,7 @@ do { \ DECLARE_PER_CPU(int, hardirqs_enabled); DECLARE_PER_CPU(int, hardirq_context); +DECLARE_PER_CPU(unsigned int, lockdep_recursion); /* * The below lockdep_assert_*() macros use raw_cpu_read() to access the above @@ -543,25 +544,27 @@ DECLARE_PER_CPU(int, hardirq_context); * read the value from our previous CPU. */ +#define __lockdep_enabled (debug_locks && !raw_cpu_read(lockdep_recursion)) + #define lockdep_assert_irqs_enabled() \ do { \ - WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirqs_enabled)); \ + WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirqs_enabled)); \ } while (0) #define lockdep_assert_irqs_disabled() \ do { \ - WARN_ON_ONCE(debug_locks && raw_cpu_read(hardirqs_enabled)); \ + WARN_ON_ONCE(__lockdep_enabled && raw_cpu_read(hardirqs_enabled)); \ } while (0) #define lockdep_assert_in_irq() \ do { \ - WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirq_context)); \ + WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirq_context)); \ } while (0) #define lockdep_assert_preemption_enabled() \ do { \ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ - debug_locks && \ + __lockdep_enabled && \ (preempt_count() != 0 || \ !raw_cpu_read(hardirqs_enabled))); \ } while (0) @@ -569,7 +572,7 @@ do { \ #define lockdep_assert_preemption_disabled() \ do { \ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ - debug_locks && \ + __lockdep_enabled && \ (preempt_count() == 0 && \ raw_cpu_read(hardirqs_enabled))); \ } while (0) -- cgit v1.2.3 From baffd723e44dc3d7f84f0b8f1fe1ece00ddd2710 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 5 Oct 2020 09:56:57 +0200 Subject: lockdep: Revert "lockdep: Use raw_cpu_*() for per-cpu variables" The thinking in commit: fddf9055a60d ("lockdep: Use raw_cpu_*() for per-cpu variables") is flawed. While it is true that when we're migratable both CPUs will have a 0 value, it doesn't hold that when we do get migrated in the middle of a raw_cpu_op(), the old CPU will still have 0 by the time we get around to reading it on the new CPU. Luckily, the reason for that commit (s390 using preempt_disable() instead of preempt_disable_notrace() in their percpu code), has since been fixed by commit: 1196f12a2c96 ("s390: don't trace preemption in percpu macros") An audit of arch/*/include/asm/percpu*.h shows there are no other architectures affected by this particular issue. Fixes: fddf9055a60d ("lockdep: Use raw_cpu_*() for per-cpu variables") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20201005095958.GJ2651@hirez.programming.kicks-ass.net --- include/linux/lockdep.h | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b1227be47496..1130f271de66 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -512,19 +512,19 @@ static inline void print_irqtrace_events(struct task_struct *curr) #define lock_map_release(l) lock_release(l, _THIS_IP_) #ifdef CONFIG_PROVE_LOCKING -# define might_lock(lock) \ +# define might_lock(lock) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ lock_acquire(&(lock)->dep_map, 0, 0, 0, 1, NULL, _THIS_IP_); \ lock_release(&(lock)->dep_map, _THIS_IP_); \ } while (0) -# define might_lock_read(lock) \ +# define might_lock_read(lock) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ lock_acquire(&(lock)->dep_map, 0, 0, 1, 1, NULL, _THIS_IP_); \ lock_release(&(lock)->dep_map, _THIS_IP_); \ } while (0) -# define might_lock_nested(lock, subclass) \ +# define might_lock_nested(lock, subclass) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ lock_acquire(&(lock)->dep_map, subclass, 0, 1, 1, NULL, \ @@ -536,29 +536,21 @@ DECLARE_PER_CPU(int, hardirqs_enabled); DECLARE_PER_CPU(int, hardirq_context); DECLARE_PER_CPU(unsigned int, lockdep_recursion); -/* - * The below lockdep_assert_*() macros use raw_cpu_read() to access the above - * per-cpu variables. This is required because this_cpu_read() will potentially - * call into preempt/irq-disable and that obviously isn't right. This is also - * correct because when IRQs are enabled, it doesn't matter if we accidentally - * read the value from our previous CPU. - */ - -#define __lockdep_enabled (debug_locks && !raw_cpu_read(lockdep_recursion)) +#define __lockdep_enabled (debug_locks && !this_cpu_read(lockdep_recursion)) #define lockdep_assert_irqs_enabled() \ do { \ - WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirqs_enabled)); \ + WARN_ON_ONCE(__lockdep_enabled && !this_cpu_read(hardirqs_enabled)); \ } while (0) #define lockdep_assert_irqs_disabled() \ do { \ - WARN_ON_ONCE(__lockdep_enabled && raw_cpu_read(hardirqs_enabled)); \ + WARN_ON_ONCE(__lockdep_enabled && this_cpu_read(hardirqs_enabled)); \ } while (0) #define lockdep_assert_in_irq() \ do { \ - WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirq_context)); \ + WARN_ON_ONCE(__lockdep_enabled && !this_cpu_read(hardirq_context)); \ } while (0) #define lockdep_assert_preemption_enabled() \ @@ -566,7 +558,7 @@ do { \ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ __lockdep_enabled && \ (preempt_count() != 0 || \ - !raw_cpu_read(hardirqs_enabled))); \ + !this_cpu_read(hardirqs_enabled))); \ } while (0) #define lockdep_assert_preemption_disabled() \ @@ -574,7 +566,7 @@ do { \ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ __lockdep_enabled && \ (preempt_count() == 0 && \ - raw_cpu_read(hardirqs_enabled))); \ + this_cpu_read(hardirqs_enabled))); \ } while (0) #else -- cgit v1.2.3 From 38b9f903f22b9baa5c4b9bfb07c8bbc49f5efbba Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 7 Oct 2020 09:00:49 +0300 Subject: net/mlx5: Handle sync reset request event Once the driver gets sync_reset_request from firmware it prepares for the coming reset and sends acknowledge. After getting this event the driver expects device reset, either it will trigger PCI reset on sync_reset_now event or such PCI reset will be triggered by another PF of the same device. So it moves to reset requested mode and if it gets PCI reset triggered by the other PF it detect the reset and reloads. Signed-off-by: Moshe Shemesh Reviewed-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 239e5348ee09..add85094f9a5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -501,6 +501,7 @@ struct mlx5_mpfs; struct mlx5_eswitch; struct mlx5_lag; struct mlx5_devcom; +struct mlx5_fw_reset; struct mlx5_eq_table; struct mlx5_irq_table; @@ -578,6 +579,7 @@ struct mlx5_priv { struct mlx5_core_sriov sriov; struct mlx5_lag *lag; struct mlx5_devcom *devcom; + struct mlx5_fw_reset *fw_reset; struct mlx5_core_roce roce; struct mlx5_fc_stats fc_stats; struct mlx5_rl_table rl_table; -- cgit v1.2.3 From 2d69356752ff862dbb0c7e6725874740799d7708 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 7 Oct 2020 09:00:55 +0300 Subject: net/mlx5: Add support for fw live patch event Firmware live patch event notifies the driver that the firmware was just updated using live patch. In such case the driver should not reload or re-initiate entities, part to updating the firmware version and re-initiate the firmware tracer which can be updated by live patch with new strings database to help debugging an issue. Signed-off-by: Moshe Shemesh Reviewed-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- include/linux/mlx5/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 81ca5989009b..cf824366a7d1 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -366,6 +366,7 @@ enum { enum { MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1, MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT = 0x5, + MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT = 0x7, MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT = 0x8, }; -- cgit v1.2.3 From 44f3625bc61653ea3bde9960298faf2f5518fda5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Oct 2020 12:45:17 +0200 Subject: netlink: export policy in extended ACK Add a new attribute NLMSGERR_ATTR_POLICY to the extended ACK to advertise the policy, e.g. if an attribute was out of range, you'll know the range that's permissible. Add new NL_SET_ERR_MSG_ATTR_POL() and NL_SET_ERR_MSG_ATTR_POL() macros to set this, since realistically it's only useful to do this when the bad attribute (offset) is also returned. Use it in lib/nlattr.c which practically does all the policy validation. v2: - add and use netlink_policy_dump_attr_size_estimate() v3: - remove redundant break v4: - really remove redundant break ... sorry Reviewed-by: Jakub Kicinski Signed-off-by: Johannes Berg Signed-off-by: Jakub Kicinski --- include/linux/netlink.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index e3e49f0e5c13..666cd0390699 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -68,12 +68,14 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) * @_msg: message string to report - don't access directly, use * %NL_SET_ERR_MSG * @bad_attr: attribute with error + * @policy: policy for a bad attribute * @cookie: cookie data to return to userspace (for success) * @cookie_len: actual cookie data length */ struct netlink_ext_ack { const char *_msg; const struct nlattr *bad_attr; + const struct nla_policy *policy; u8 cookie[NETLINK_MAX_COOKIE_LEN]; u8 cookie_len; }; @@ -95,21 +97,29 @@ struct netlink_ext_ack { #define NL_SET_ERR_MSG_MOD(extack, msg) \ NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg) -#define NL_SET_BAD_ATTR(extack, attr) do { \ - if ((extack)) \ +#define NL_SET_BAD_ATTR_POLICY(extack, attr, pol) do { \ + if ((extack)) { \ (extack)->bad_attr = (attr); \ + (extack)->policy = (pol); \ + } \ } while (0) -#define NL_SET_ERR_MSG_ATTR(extack, attr, msg) do { \ - static const char __msg[] = msg; \ - struct netlink_ext_ack *__extack = (extack); \ - \ - if (__extack) { \ - __extack->_msg = __msg; \ - __extack->bad_attr = (attr); \ - } \ +#define NL_SET_BAD_ATTR(extack, attr) NL_SET_BAD_ATTR_POLICY(extack, attr, NULL) + +#define NL_SET_ERR_MSG_ATTR_POL(extack, attr, pol, msg) do { \ + static const char __msg[] = msg; \ + struct netlink_ext_ack *__extack = (extack); \ + \ + if (__extack) { \ + __extack->_msg = __msg; \ + __extack->bad_attr = (attr); \ + __extack->policy = (pol); \ + } \ } while (0) +#define NL_SET_ERR_MSG_ATTR(extack, attr, msg) \ + NL_SET_ERR_MSG_ATTR_POL(extack, attr, NULL, msg) + static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, u64 cookie) { -- cgit v1.2.3 From 55567976629e58fde28fb70612ca73228271eef2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Oct 2020 10:10:20 +0100 Subject: genirq/irqdomain: Allow partial trimming of irq_data hierarchy It appears that some HW is ugly enough that not all the interrupts connected to a particular interrupt controller end up with the same hierarchy depth (some of them are terminated early). This leaves the irqchip hacker with only two choices, both equally bad: - create discrete domain chains, one for each "hierarchy depth", which is very hard to maintain - create fake hierarchy levels for the shallow paths, leading to all kind of problems (what are the safe hwirq values for these fake levels?) Implement the ability to cut short a single interrupt hierarchy from a level marked as being disconnected by using the new irq_domain_disconnect_hierarchy() helper. The irqdomain allocation code will then perform the trimming Signed-off-by: Marc Zyngier --- include/linux/irqdomain.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index b37350c4fe37..a52b095bd404 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -509,6 +509,9 @@ extern void irq_domain_free_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs); +extern int irq_domain_disconnect_hierarchy(struct irq_domain *domain, + unsigned int virq); + static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) { return domain->flags & IRQ_DOMAIN_FLAG_HIERARCHY; -- cgit v1.2.3 From ead1c9f376dbb2805796098ed6d2a70921c77ee5 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Wed, 30 Sep 2020 16:50:48 +0300 Subject: iio: adc: at91_adc: remove platform data and move defs in driver file The AT91 ADC driver no longer uses the 'at91_add_device_adc' platform data type. This is no longer used (at least in mainline boards). This change removes the platform-data initialization from the driver, since it is mostly dead code now. Some definitions [from the platform data at91_adc.h include] have been moved in the driver, since they are needed in the driver. Signed-off-by: Alexandru Ardelean Reviewed-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200930135048.11530-5-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/platform_data/at91_adc.h | 49 ---------------------------------- 1 file changed, 49 deletions(-) delete mode 100644 include/linux/platform_data/at91_adc.h (limited to 'include/linux') diff --git a/include/linux/platform_data/at91_adc.h b/include/linux/platform_data/at91_adc.h deleted file mode 100644 index f20eaeb827ce..000000000000 --- a/include/linux/platform_data/at91_adc.h +++ /dev/null @@ -1,49 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2011 Free Electrons - */ - -#ifndef _AT91_ADC_H_ -#define _AT91_ADC_H_ - -enum atmel_adc_ts_type { - ATMEL_ADC_TOUCHSCREEN_NONE = 0, - ATMEL_ADC_TOUCHSCREEN_4WIRE = 4, - ATMEL_ADC_TOUCHSCREEN_5WIRE = 5, -}; - -/** - * struct at91_adc_trigger - description of triggers - * @name: name of the trigger advertised to the user - * @value: value to set in the ADC's trigger setup register - to enable the trigger - * @is_external: Does the trigger rely on an external pin? - */ -struct at91_adc_trigger { - const char *name; - u8 value; - bool is_external; -}; - -/** - * struct at91_adc_data - platform data for ADC driver - * @channels_used: channels in use on the board as a bitmask - * @startup_time: startup time of the ADC in microseconds - * @trigger_list: Triggers available in the ADC - * @trigger_number: Number of triggers available in the ADC - * @use_external_triggers: does the board has external triggers availables - * @vref: Reference voltage for the ADC in millivolts - * @touchscreen_type: If a touchscreen is connected, its type (4 or 5 wires) - */ -struct at91_adc_data { - unsigned long channels_used; - u8 startup_time; - struct at91_adc_trigger *trigger_list; - u8 trigger_number; - bool use_external_triggers; - u16 vref; - enum atmel_adc_ts_type touchscreen_type; -}; - -extern void __init at91_add_device_adc(struct at91_adc_data *data); -#endif -- cgit v1.2.3 From 5483b8d5015bb366c372870cfe4448742082e41f Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Fri, 2 Oct 2020 11:27:23 +0300 Subject: iio: adc: ad7887: invert/rework external ref logic This change inverts/reworks the logic to use an external reference via a provided regulator. Now the driver tries to obtain a regulator. If one is found, then it is used. The rest of the driver logic already checks if there is a non-NULL reference to a regulator, so it should be fine. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201002082723.184810-1-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/platform_data/ad7887.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/ad7887.h b/include/linux/platform_data/ad7887.h index 732af46b2d16..9b4dca6ae70b 100644 --- a/include/linux/platform_data/ad7887.h +++ b/include/linux/platform_data/ad7887.h @@ -13,13 +13,9 @@ * second input channel, and Vref is internally connected to Vdd. If set to * false the device is used in single channel mode and AIN1/Vref is used as * VREF input. - * @use_onchip_ref: Whether to use the onchip reference. If set to true the - * internal 2.5V reference is used. If set to false a external reference is - * used. */ struct ad7887_platform_data { bool en_dual; - bool use_onchip_ref; }; #endif /* IIO_ADC_AD7887_H_ */ -- cgit v1.2.3 From 28963f2f6b46d75bda8fed15bd5ce9923427a40d Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 1 Oct 2020 17:10:48 +0300 Subject: iio: adc: ad7298: rework external ref setup & remove platform data This change removes the old platform data for ad7298. It is only used to provide whether to use an external regulator as a reference. So, the logic is inverted a bit. The driver now tries to obtain a regulator. If one is provided, then the external ref is used. The rest of the logic should work as before. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201001141048.69050-1-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/platform_data/ad7298.h | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 include/linux/platform_data/ad7298.h (limited to 'include/linux') diff --git a/include/linux/platform_data/ad7298.h b/include/linux/platform_data/ad7298.h deleted file mode 100644 index 3e0ffe2d5d3d..000000000000 --- a/include/linux/platform_data/ad7298.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * AD7298 SPI ADC driver - * - * Copyright 2011 Analog Devices Inc. - */ - -#ifndef __LINUX_PLATFORM_DATA_AD7298_H__ -#define __LINUX_PLATFORM_DATA_AD7298_H__ - -/** - * struct ad7298_platform_data - Platform data for the ad7298 ADC driver - * @ext_ref: Whether to use an external reference voltage. - **/ -struct ad7298_platform_data { - bool ext_ref; -}; - -#endif /* IIO_ADC_AD7298_H_ */ -- cgit v1.2.3 From 223f4d9517f8d97721647297b1cde41241a45233 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 1 Oct 2020 17:10:04 +0300 Subject: iio: dac: ad7303: remove platform data header The information in the ad7303 platform_data header is unused, so it's dead code. This change removes it and it's inclusion from the driver. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201001141004.53846-1-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/platform_data/ad7303.h | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 include/linux/platform_data/ad7303.h (limited to 'include/linux') diff --git a/include/linux/platform_data/ad7303.h b/include/linux/platform_data/ad7303.h deleted file mode 100644 index c2bd0a13bea1..000000000000 --- a/include/linux/platform_data/ad7303.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Analog Devices AD7303 DAC driver - * - * Copyright 2013 Analog Devices Inc. - */ - -#ifndef __IIO_ADC_AD7303_H__ -#define __IIO_ADC_AD7303_H__ - -/** - * struct ad7303_platform_data - AD7303 platform data - * @use_external_ref: If set to true use an external voltage reference connected - * to the REF pin, otherwise use the internal reference derived from Vdd. - */ -struct ad7303_platform_data { - bool use_external_ref; -}; - -#endif -- cgit v1.2.3 From 9aa1206e8f48222f35a0c809f33b2f4aaa1e2661 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 11 Oct 2020 01:40:02 +0200 Subject: bpf: Add redirect_peer helper Add an efficient ingress to ingress netns switch that can be used out of tc BPF programs in order to redirect traffic from host ns ingress into a container veth device ingress without having to go via CPU backlog queue [0]. For local containers this can also be utilized and path via CPU backlog queue only needs to be taken once, not twice. On a high level this borrows from ipvlan which does similar switch in __netif_receive_skb_core() and then iterates via another_round. This helps to reduce latency for mentioned use cases. Pod to remote pod with redirect(), TCP_RR [1]: # percpu_netperf 10.217.1.33 RT_LATENCY: 122.450 (per CPU: 122.666 122.401 122.333 122.401 ) MEAN_LATENCY: 121.210 (per CPU: 121.100 121.260 121.320 121.160 ) STDDEV_LATENCY: 120.040 (per CPU: 119.420 119.910 125.460 115.370 ) MIN_LATENCY: 46.500 (per CPU: 47.000 47.000 47.000 45.000 ) P50_LATENCY: 118.500 (per CPU: 118.000 119.000 118.000 119.000 ) P90_LATENCY: 127.500 (per CPU: 127.000 128.000 127.000 128.000 ) P99_LATENCY: 130.750 (per CPU: 131.000 131.000 129.000 132.000 ) TRANSACTION_RATE: 32666.400 (per CPU: 8152.200 8169.842 8174.439 8169.897 ) Pod to remote pod with redirect_peer(), TCP_RR: # percpu_netperf 10.217.1.33 RT_LATENCY: 44.449 (per CPU: 43.767 43.127 45.279 45.622 ) MEAN_LATENCY: 45.065 (per CPU: 44.030 45.530 45.190 45.510 ) STDDEV_LATENCY: 84.823 (per CPU: 66.770 97.290 84.380 90.850 ) MIN_LATENCY: 33.500 (per CPU: 33.000 33.000 34.000 34.000 ) P50_LATENCY: 43.250 (per CPU: 43.000 43.000 43.000 44.000 ) P90_LATENCY: 46.750 (per CPU: 46.000 47.000 47.000 47.000 ) P99_LATENCY: 52.750 (per CPU: 51.000 54.000 53.000 53.000 ) TRANSACTION_RATE: 90039.500 (per CPU: 22848.186 23187.089 22085.077 21919.130 ) [0] https://linuxplumbersconf.org/event/7/contributions/674/attachments/568/1002/plumbers_2020_cilium_load_balancer.pdf [1] https://github.com/borkmann/netperf_scripts/blob/master/percpu_netperf Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201010234006.7075-3-daniel@iogearbox.net --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 28cfa53daf72..0533f86018dd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1277,6 +1277,9 @@ struct netdev_net_notifier { * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, * int cmd); * Add, change, delete or get information on an IPv4 tunnel. + * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev); + * If a device is paired with a peer device, return the peer instance. + * The caller must be under RCU read context. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1484,6 +1487,7 @@ struct net_device_ops { struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); + struct net_device * (*ndo_get_peer_dev)(struct net_device *dev); }; /** -- cgit v1.2.3 From 4a8f87e60f6db40e640f1db555d063b2c4dea5f1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 11 Oct 2020 01:40:03 +0200 Subject: bpf: Allow for map-in-map with dynamic inner array map entries Recent work in f4d05259213f ("bpf: Add map_meta_equal map ops") and 134fede4eecf ("bpf: Relax max_entries check for most of the inner map types") added support for dynamic inner max elements for most map-in-map types. Exceptions were maps like array or prog array where the map_gen_lookup() callback uses the maps' max_entries field as a constant when emitting instructions. We recently implemented Maglev consistent hashing into Cilium's load balancer which uses map-in-map with an outer map being hash and inner being array holding the Maglev backend table for each service. This has been designed this way in order to reduce overall memory consumption given the outer hash map allows to avoid preallocating a large, flat memory area for all services. Also, the number of service mappings is not always known a-priori. The use case for dynamic inner array map entries is to further reduce memory overhead, for example, some services might just have a small number of back ends while others could have a large number. Right now the Maglev backend table for small and large number of backends would need to have the same inner array map entries which adds a lot of unneeded overhead. Dynamic inner array map entries can be realized by avoiding the inlined code generation for their lookup. The lookup will still be efficient since it will be calling into array_map_lookup_elem() directly and thus avoiding retpoline. The patch adds a BPF_F_INNER_MAP flag to map creation which therefore skips inline code generation and relaxes array_map_meta_equal() check to ignore both maps' max_entries. This also still allows to have faster lookups for map-in-map when BPF_F_INNER_MAP is not specified and hence dynamic max_entries not needed. Example code generation where inner map is dynamic sized array: # bpftool p d x i 125 int handle__sys_enter(void * ctx): ; int handle__sys_enter(void *ctx) 0: (b4) w1 = 0 ; int key = 0; 1: (63) *(u32 *)(r10 -4) = r1 2: (bf) r2 = r10 ; 3: (07) r2 += -4 ; inner_map = bpf_map_lookup_elem(&outer_arr_dyn, &key); 4: (18) r1 = map[id:468] 6: (07) r1 += 272 7: (61) r0 = *(u32 *)(r2 +0) 8: (35) if r0 >= 0x3 goto pc+5 9: (67) r0 <<= 3 10: (0f) r0 += r1 11: (79) r0 = *(u64 *)(r0 +0) 12: (15) if r0 == 0x0 goto pc+1 13: (05) goto pc+1 14: (b7) r0 = 0 15: (b4) w6 = -1 ; if (!inner_map) 16: (15) if r0 == 0x0 goto pc+6 17: (bf) r2 = r10 ; 18: (07) r2 += -4 ; val = bpf_map_lookup_elem(inner_map, &key); 19: (bf) r1 = r0 | No inlining but instead 20: (85) call array_map_lookup_elem#149280 | call to array_map_lookup_elem() ; return val ? *val : -1; | for inner array lookup. 21: (15) if r0 == 0x0 goto pc+1 ; return val ? *val : -1; 22: (61) r6 = *(u32 *)(r0 +0) ; } 23: (bc) w0 = w6 24: (95) exit Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201010234006.7075-4-daniel@iogearbox.net --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index dc63eeed4fd9..2b16bf48aab6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -82,7 +82,7 @@ struct bpf_map_ops { void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, int fd); void (*map_fd_put_ptr)(void *ptr); - u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); + int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); void (*map_seq_show_elem)(struct bpf_map *map, void *key, struct seq_file *m); -- cgit v1.2.3 From ef5659280eb13e8ac31c296f58cfdfa1684ac06b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sat, 10 Oct 2020 22:09:38 -0700 Subject: bpf, sockmap: Allow skipping sk_skb parser program Currently, we often run with a nop parser namely one that just does this, 'return skb->len'. This happens when either our verdict program can handle streaming data or it is only looking at socket data such as IP addresses and other metadata associated with the flow. The second case is common for a L3/L4 proxy for instance. So lets allow loading programs without the parser then we can skip the stream parser logic and avoid having to add a BPF program that is effectively a nop. Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/160239297866.8495.13345662302749219672.stgit@john-Precision-5820-Tower --- include/linux/skmsg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 3119928fc103..fec0c5ac1c4f 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -308,6 +308,8 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node); int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock); void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock); +void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock); +void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock); int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, struct sk_msg *msg); -- cgit v1.2.3 From f726f3d37163f714034aa5fd1f92a1a73df4297f Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 12 Oct 2020 09:43:54 +0200 Subject: can: remove obsolete version strings As pointed out by Jakub Kicinski here: http://lore.kernel.org/r/20201009175751.5c54097f@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com this patch removes the obsolete version information of the different CAN protocols and the AF_CAN core module. Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20201012074354.25839-2-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/linux/can/core.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/core.h b/include/linux/can/core.h index 7da9f1f82e8e..5fb8d0e3f9c1 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -18,13 +18,6 @@ #include #include -#define CAN_VERSION "20170425" - -/* increment this number each time you change some user-space interface */ -#define CAN_ABI_VERSION "9" - -#define CAN_VERSION_STRING "rev " CAN_VERSION " abi " CAN_ABI_VERSION - #define DNAME(dev) ((dev) ? (dev)->name : "any") /** -- cgit v1.2.3 From 585834a5eeb38a9b290786b6d3ee55f22098c602 Mon Sep 17 00:00:00 2001 From: zhuguangqing Date: Thu, 17 Sep 2020 15:35:53 +0800 Subject: thermal/idle_inject: Fix comment of idle_duration_us and name of latency_ns The comment of idle_duration_us and the name of latency_ns can be misleading, so fix them. Signed-off-by: zhuguangqing Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20200917073553.898-1-zhuguangqing83@gmail.com --- include/linux/idle_inject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h index 91a8612b8bf9..fb88e23a99d3 100644 --- a/include/linux/idle_inject.h +++ b/include/linux/idle_inject.h @@ -28,6 +28,6 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev, unsigned int *idle_duration_us); void idle_inject_set_latency(struct idle_inject_device *ii_dev, - unsigned int latency_ns); + unsigned int latency_us); #endif /* __IDLE_INJECT_H__ */ -- cgit v1.2.3 From 88052319620a35f827509d645e4c8063ded751c8 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 15 Sep 2020 15:36:49 -0700 Subject: thermal: core: Add new event for sending keep alive notifications This event is sent by the platform firmware to confirm that user space thermal solution is alive. The response to this event from the user space thermal solution is platform specific. Signed-off-by: Srinivas Pandruvada Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20200915223650.406046-3-srinivas.pandruvada@linux.intel.com --- include/linux/thermal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 42ef807e5d84..42b69d4072a4 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -55,6 +55,7 @@ enum thermal_notify_event { THERMAL_DEVICE_UP, /* Thermal device is up after a down event */ THERMAL_DEVICE_POWER_CAPABILITY_CHANGED, /* power capability changed */ THERMAL_TABLE_CHANGED, /* Thermal table(s) changed */ + THERMAL_EVENT_KEEP_ALIVE, /* Request for user space handler to respond */ }; struct thermal_zone_device_ops { -- cgit v1.2.3 From ecd1d2a3e4f893584578a304d7bd1591e895a8e4 Mon Sep 17 00:00:00 2001 From: zhuguangqing Date: Mon, 14 Sep 2020 15:11:01 +0800 Subject: thermal: cooling: Remove unused variable *tz 1. devfreq_cooling.c: The variable *tz is not used in devfreq_cooling_get_requested_power(), devfreq_cooling_state2power() and devfreq_cooling_power2state(). 2. cpufreq_cooling.c: After 84fe2cab48590, the variable *tz is not used anymore in cpufreq_get_requested_power(), cpufreq_state2power() and cpufreq_power2state(). Remove the variable *tz. Signed-off-by: zhuguangqing Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20200914071101.13575-1-zhuguangqing83@gmail.com --- include/linux/thermal.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 42b69d4072a4..d07ea27e72a9 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -85,12 +85,9 @@ struct thermal_cooling_device_ops { int (*get_max_state) (struct thermal_cooling_device *, unsigned long *); int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *); int (*set_cur_state) (struct thermal_cooling_device *, unsigned long); - int (*get_requested_power)(struct thermal_cooling_device *, - struct thermal_zone_device *, u32 *); - int (*state2power)(struct thermal_cooling_device *, - struct thermal_zone_device *, unsigned long, u32 *); - int (*power2state)(struct thermal_cooling_device *, - struct thermal_zone_device *, u32, unsigned long *); + int (*get_requested_power)(struct thermal_cooling_device *, u32 *); + int (*state2power)(struct thermal_cooling_device *, unsigned long, u32 *); + int (*power2state)(struct thermal_cooling_device *, u32, unsigned long *); }; struct thermal_cooling_device { -- cgit v1.2.3 From 3986f9a42e993075af01c17dc8968cfb96a4fe53 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 17 Aug 2020 13:45:04 +0200 Subject: libceph: multiple workspaces for CRUSH computations Replace a global map->crush_workspace (protected by a global mutex) with a list of workspaces, up to the number of CPUs + 1. This is based on a patch from Robin Geuze . Robin and his team have observed a 10-20% increase in IOPS on all queue depths and lower CPU usage as well on a high-end all-NVMe 100GbE cluster. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 14 ++++++++++++-- include/linux/crush/crush.h | 3 +++ 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 3f4498fef6ad..cad9acfbc320 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -137,6 +137,17 @@ int ceph_oid_aprintf(struct ceph_object_id *oid, gfp_t gfp, const char *fmt, ...); void ceph_oid_destroy(struct ceph_object_id *oid); +struct workspace_manager { + struct list_head idle_ws; + spinlock_t ws_lock; + /* Number of free workspaces */ + int free_ws; + /* Total number of allocated workspaces */ + atomic_t total_ws; + /* Waiters for a free workspace */ + wait_queue_head_t ws_wait; +}; + struct ceph_pg_mapping { struct rb_node node; struct ceph_pg pgid; @@ -184,8 +195,7 @@ struct ceph_osdmap { * the list of osds that store+replicate them. */ struct crush_map *crush; - struct mutex crush_workspace_mutex; - void *crush_workspace; + struct workspace_manager crush_wsm; }; static inline bool ceph_osd_exists(struct ceph_osdmap *map, int osd) diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 2f811baf78d2..30dba392b730 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -346,6 +346,9 @@ struct crush_work_bucket { struct crush_work { struct crush_work_bucket **work; /* Per-bucket working store */ +#ifdef __KERNEL__ + struct list_head item; +#endif }; #ifdef __KERNEL__ -- cgit v1.2.3 From 0b98acd6188309333c3a8a6e16feadadd31e4523 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 14 Sep 2020 13:39:19 +0200 Subject: libceph, rbd, ceph: "blacklist" -> "blocklist" Signed-off-by: Ilya Dryomov --- include/linux/ceph/mon_client.h | 2 +- include/linux/ceph/rados.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index ce4ffeb384d7..b658961156a0 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -142,7 +142,7 @@ int ceph_monc_get_version(struct ceph_mon_client *monc, const char *what, int ceph_monc_get_version_async(struct ceph_mon_client *monc, const char *what, ceph_monc_callback_t cb, u64 private_data); -int ceph_monc_blacklist_add(struct ceph_mon_client *monc, +int ceph_monc_blocklist_add(struct ceph_mon_client *monc, struct ceph_entity_addr *client_addr); extern int ceph_monc_open_session(struct ceph_mon_client *monc); diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 3a518fd0eaad..43a7a1573b51 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -424,7 +424,7 @@ enum { }; #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ -#define EBLACKLISTED ESHUTDOWN /* blacklisted */ +#define EBLOCKLISTED ESHUTDOWN /* blocklisted */ /* xattr comparison */ enum { -- cgit v1.2.3 From b07720d0bd1e7c2251642010efb6075dbee23bb8 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Oct 2020 14:38:08 +0200 Subject: libceph: fix ENTITY_NAME format suggestion Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 76371aaae2d1..60b324efd1c4 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -54,7 +54,7 @@ struct ceph_connection_operations { int (*check_message_signature) (struct ceph_msg *msg); }; -/* use format string %s%d */ +/* use format string %s%lld */ #define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num) struct ceph_messenger { -- cgit v1.2.3 From 476c5818c37a7828d558f34ae01f0c32f8bfadde Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 29 Aug 2020 22:03:13 +0900 Subject: llist: Add nonatomic __llist_add() and __llist_dell_all() We'll use these in the new, lockless kretprobes code. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/159870619318.1229682.13027387548510028721.stgit@devnote2 --- include/linux/llist.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/llist.h b/include/linux/llist.h index 2e9c7215882b..24f207b0190b 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -197,6 +197,16 @@ static inline struct llist_node *llist_next(struct llist_node *node) extern bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, struct llist_head *head); + +static inline bool __llist_add_batch(struct llist_node *new_first, + struct llist_node *new_last, + struct llist_head *head) +{ + new_last->next = head->first; + head->first = new_first; + return new_last->next == NULL; +} + /** * llist_add - add a new entry * @new: new entry to be added @@ -209,6 +219,11 @@ static inline bool llist_add(struct llist_node *new, struct llist_head *head) return llist_add_batch(new, new, head); } +static inline bool __llist_add(struct llist_node *new, struct llist_head *head) +{ + return __llist_add_batch(new, new, head); +} + /** * llist_del_all - delete all entries from lock-less list * @head: the head of lock-less list to delete all entries @@ -222,6 +237,14 @@ static inline struct llist_node *llist_del_all(struct llist_head *head) return xchg(&head->first, NULL); } +static inline struct llist_node *__llist_del_all(struct llist_head *head) +{ + struct llist_node *first = head->first; + + head->first = NULL; + return first; +} + extern struct llist_node *llist_del_first(struct llist_head *head); struct llist_node *llist_reverse_order(struct llist_node *head); -- cgit v1.2.3 From d741bf41d7c7db4898bacfcb020353cddc032fd8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 29 Aug 2020 22:03:24 +0900 Subject: kprobes: Remove kretprobe hash The kretprobe hash is mostly superfluous, replace it with a per-task variable. This gets rid of the task hash and it's related locking. Note that this may change the kprobes module-exported API for kretprobe handlers. If any out-of-tree kretprobe user uses ri->rp, use get_kretprobe(ri) instead. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/159870620431.1229682.16325792502413731312.stgit@devnote2 --- include/linux/kprobes.h | 19 +++++++++++++++++-- include/linux/sched.h | 4 ++++ 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 5c8c271fa1e9..00cf4421efd5 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #ifdef CONFIG_KPROBES @@ -144,6 +145,11 @@ static inline int kprobe_ftrace(struct kprobe *p) * ignored, due to maxactive being too low. * */ +struct kretprobe_holder { + struct kretprobe *rp; + refcount_t ref; +}; + struct kretprobe { struct kprobe kp; kretprobe_handler_t handler; @@ -152,17 +158,18 @@ struct kretprobe { int nmissed; size_t data_size; struct hlist_head free_instances; + struct kretprobe_holder *rph; raw_spinlock_t lock; }; struct kretprobe_instance { union { + struct llist_node llist; struct hlist_node hlist; struct rcu_head rcu; }; - struct kretprobe *rp; + struct kretprobe_holder *rph; kprobe_opcode_t *ret_addr; - struct task_struct *task; void *fp; char data[]; }; @@ -221,6 +228,14 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, return ret; } +static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), + "Kretprobe is accessed from instance under preemptive context"); + + return READ_ONCE(ri->rph->rp); +} + #else /* CONFIG_KRETPROBES */ static inline void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) diff --git a/include/linux/sched.h b/include/linux/sched.h index afe01e232935..5911805cafde 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1315,6 +1315,10 @@ struct task_struct { struct callback_head mce_kill_me; #endif +#ifdef CONFIG_KRETPROBES + struct llist_head kretprobe_instances; +#endif + /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. -- cgit v1.2.3 From 29f006fdefe6f88abde973a0b0f20d2704e93fd4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 29 Aug 2020 22:03:35 +0900 Subject: asm-generic/atomic: Add try_cmpxchg() fallbacks Only x86 provides try_cmpxchg() outside of the atomic_t interfaces, provide generic fallbacks to create this interface from the widely available cmpxchg() function. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Acked-by: Will Deacon Link: https://lore.kernel.org/r/159870621515.1229682.15506193091065001742.stgit@devnote2 --- include/linux/atomic-arch-fallback.h | 90 ++++++++++++++++++++++++++++++++---- include/linux/atomic-fallback.h | 90 ++++++++++++++++++++++++++++++++---- 2 files changed, 160 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic-arch-fallback.h b/include/linux/atomic-arch-fallback.h index bcb6aa27cfa6..a3dba31df01e 100644 --- a/include/linux/atomic-arch-fallback.h +++ b/include/linux/atomic-arch-fallback.h @@ -9,9 +9,9 @@ #include #ifndef arch_xchg_relaxed -#define arch_xchg_relaxed arch_xchg -#define arch_xchg_acquire arch_xchg -#define arch_xchg_release arch_xchg +#define arch_xchg_acquire arch_xchg +#define arch_xchg_release arch_xchg +#define arch_xchg_relaxed arch_xchg #else /* arch_xchg_relaxed */ #ifndef arch_xchg_acquire @@ -32,9 +32,9 @@ #endif /* arch_xchg_relaxed */ #ifndef arch_cmpxchg_relaxed -#define arch_cmpxchg_relaxed arch_cmpxchg -#define arch_cmpxchg_acquire arch_cmpxchg -#define arch_cmpxchg_release arch_cmpxchg +#define arch_cmpxchg_acquire arch_cmpxchg +#define arch_cmpxchg_release arch_cmpxchg +#define arch_cmpxchg_relaxed arch_cmpxchg #else /* arch_cmpxchg_relaxed */ #ifndef arch_cmpxchg_acquire @@ -55,9 +55,9 @@ #endif /* arch_cmpxchg_relaxed */ #ifndef arch_cmpxchg64_relaxed -#define arch_cmpxchg64_relaxed arch_cmpxchg64 -#define arch_cmpxchg64_acquire arch_cmpxchg64 -#define arch_cmpxchg64_release arch_cmpxchg64 +#define arch_cmpxchg64_acquire arch_cmpxchg64 +#define arch_cmpxchg64_release arch_cmpxchg64 +#define arch_cmpxchg64_relaxed arch_cmpxchg64 #else /* arch_cmpxchg64_relaxed */ #ifndef arch_cmpxchg64_acquire @@ -77,6 +77,76 @@ #endif /* arch_cmpxchg64_relaxed */ +#ifndef arch_try_cmpxchg_relaxed +#ifdef arch_try_cmpxchg +#define arch_try_cmpxchg_acquire arch_try_cmpxchg +#define arch_try_cmpxchg_release arch_try_cmpxchg +#define arch_try_cmpxchg_relaxed arch_try_cmpxchg +#endif /* arch_try_cmpxchg */ + +#ifndef arch_try_cmpxchg +#define arch_try_cmpxchg(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg */ + +#ifndef arch_try_cmpxchg_acquire +#define arch_try_cmpxchg_acquire(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg_acquire((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg_acquire */ + +#ifndef arch_try_cmpxchg_release +#define arch_try_cmpxchg_release(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg_release((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg_release */ + +#ifndef arch_try_cmpxchg_relaxed +#define arch_try_cmpxchg_relaxed(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg_relaxed((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg_relaxed */ + +#else /* arch_try_cmpxchg_relaxed */ + +#ifndef arch_try_cmpxchg_acquire +#define arch_try_cmpxchg_acquire(...) \ + __atomic_op_acquire(arch_try_cmpxchg, __VA_ARGS__) +#endif + +#ifndef arch_try_cmpxchg_release +#define arch_try_cmpxchg_release(...) \ + __atomic_op_release(arch_try_cmpxchg, __VA_ARGS__) +#endif + +#ifndef arch_try_cmpxchg +#define arch_try_cmpxchg(...) \ + __atomic_op_fence(arch_try_cmpxchg, __VA_ARGS__) +#endif + +#endif /* arch_try_cmpxchg_relaxed */ + #ifndef arch_atomic_read_acquire static __always_inline int arch_atomic_read_acquire(const atomic_t *v) @@ -2288,4 +2358,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 90cd26cfd69d2250303d654955a0cc12620fb91b +// cca554917d7ea73d5e3e7397dd70c484cad9b2c4 diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h index fd525c71d676..2a3f55d98be9 100644 --- a/include/linux/atomic-fallback.h +++ b/include/linux/atomic-fallback.h @@ -9,9 +9,9 @@ #include #ifndef xchg_relaxed -#define xchg_relaxed xchg -#define xchg_acquire xchg -#define xchg_release xchg +#define xchg_acquire xchg +#define xchg_release xchg +#define xchg_relaxed xchg #else /* xchg_relaxed */ #ifndef xchg_acquire @@ -32,9 +32,9 @@ #endif /* xchg_relaxed */ #ifndef cmpxchg_relaxed -#define cmpxchg_relaxed cmpxchg -#define cmpxchg_acquire cmpxchg -#define cmpxchg_release cmpxchg +#define cmpxchg_acquire cmpxchg +#define cmpxchg_release cmpxchg +#define cmpxchg_relaxed cmpxchg #else /* cmpxchg_relaxed */ #ifndef cmpxchg_acquire @@ -55,9 +55,9 @@ #endif /* cmpxchg_relaxed */ #ifndef cmpxchg64_relaxed -#define cmpxchg64_relaxed cmpxchg64 -#define cmpxchg64_acquire cmpxchg64 -#define cmpxchg64_release cmpxchg64 +#define cmpxchg64_acquire cmpxchg64 +#define cmpxchg64_release cmpxchg64 +#define cmpxchg64_relaxed cmpxchg64 #else /* cmpxchg64_relaxed */ #ifndef cmpxchg64_acquire @@ -77,6 +77,76 @@ #endif /* cmpxchg64_relaxed */ +#ifndef try_cmpxchg_relaxed +#ifdef try_cmpxchg +#define try_cmpxchg_acquire try_cmpxchg +#define try_cmpxchg_release try_cmpxchg +#define try_cmpxchg_relaxed try_cmpxchg +#endif /* try_cmpxchg */ + +#ifndef try_cmpxchg +#define try_cmpxchg(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = cmpxchg((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* try_cmpxchg */ + +#ifndef try_cmpxchg_acquire +#define try_cmpxchg_acquire(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = cmpxchg_acquire((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* try_cmpxchg_acquire */ + +#ifndef try_cmpxchg_release +#define try_cmpxchg_release(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = cmpxchg_release((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* try_cmpxchg_release */ + +#ifndef try_cmpxchg_relaxed +#define try_cmpxchg_relaxed(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = cmpxchg_relaxed((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* try_cmpxchg_relaxed */ + +#else /* try_cmpxchg_relaxed */ + +#ifndef try_cmpxchg_acquire +#define try_cmpxchg_acquire(...) \ + __atomic_op_acquire(try_cmpxchg, __VA_ARGS__) +#endif + +#ifndef try_cmpxchg_release +#define try_cmpxchg_release(...) \ + __atomic_op_release(try_cmpxchg, __VA_ARGS__) +#endif + +#ifndef try_cmpxchg +#define try_cmpxchg(...) \ + __atomic_op_fence(try_cmpxchg, __VA_ARGS__) +#endif + +#endif /* try_cmpxchg_relaxed */ + #define arch_atomic_read atomic_read #define arch_atomic_read_acquire atomic_read_acquire @@ -2522,4 +2592,4 @@ atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 9d95b56f98d82a2a26c7b79ccdd0c47572d50a6f +// d78e6c293c661c15188f0ec05bce45188c8d5892 -- cgit v1.2.3 From e563604a5f5a891283b6a8db4001cee833a7c6b8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 29 Aug 2020 22:03:46 +0900 Subject: freelist: Implement lockless freelist A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly speedy under low contention. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/159870622579.1229682.16729440870040944993.stgit@devnote2 --- include/linux/freelist.h | 129 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 include/linux/freelist.h (limited to 'include/linux') diff --git a/include/linux/freelist.h b/include/linux/freelist.h new file mode 100644 index 000000000000..fc1842b96469 --- /dev/null +++ b/include/linux/freelist.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +#ifndef FREELIST_H +#define FREELIST_H + +#include + +/* + * Copyright: cameron@moodycamel.com + * + * A simple CAS-based lock-free free list. Not the fastest thing in the world + * under heavy contention, but simple and correct (assuming nodes are never + * freed until after the free list is destroyed), and fairly speedy under low + * contention. + * + * Adapted from: https://moodycamel.com/blog/2014/solving-the-aba-problem-for-lock-free-free-lists + */ + +struct freelist_node { + atomic_t refs; + struct freelist_node *next; +}; + +struct freelist_head { + struct freelist_node *head; +}; + +#define REFS_ON_FREELIST 0x80000000 +#define REFS_MASK 0x7FFFFFFF + +static inline void __freelist_add(struct freelist_node *node, struct freelist_head *list) +{ + /* + * Since the refcount is zero, and nobody can increase it once it's + * zero (except us, and we run only one copy of this method per node at + * a time, i.e. the single thread case), then we know we can safely + * change the next pointer of the node; however, once the refcount is + * back above zero, then other threads could increase it (happens under + * heavy contention, when the refcount goes to zero in between a load + * and a refcount increment of a node in try_get, then back up to + * something non-zero, then the refcount increment is done by the other + * thread) -- so if the CAS to add the node to the actual list fails, + * decrese the refcount and leave the add operation to the next thread + * who puts the refcount back to zero (which could be us, hence the + * loop). + */ + struct freelist_node *head = READ_ONCE(list->head); + + for (;;) { + WRITE_ONCE(node->next, head); + atomic_set_release(&node->refs, 1); + + if (!try_cmpxchg_release(&list->head, &head, node)) { + /* + * Hmm, the add failed, but we can only try again when + * the refcount goes back to zero. + */ + if (atomic_fetch_add_release(REFS_ON_FREELIST - 1, &node->refs) == 1) + continue; + } + return; + } +} + +static inline void freelist_add(struct freelist_node *node, struct freelist_head *list) +{ + /* + * We know that the should-be-on-freelist bit is 0 at this point, so + * it's safe to set it using a fetch_add. + */ + if (!atomic_fetch_add_release(REFS_ON_FREELIST, &node->refs)) { + /* + * Oh look! We were the last ones referencing this node, and we + * know we want to add it to the free list, so let's do it! + */ + __freelist_add(node, list); + } +} + +static inline struct freelist_node *freelist_try_get(struct freelist_head *list) +{ + struct freelist_node *prev, *next, *head = smp_load_acquire(&list->head); + unsigned int refs; + + while (head) { + prev = head; + refs = atomic_read(&head->refs); + if ((refs & REFS_MASK) == 0 || + !atomic_try_cmpxchg_acquire(&head->refs, &refs, refs+1)) { + head = smp_load_acquire(&list->head); + continue; + } + + /* + * Good, reference count has been incremented (it wasn't at + * zero), which means we can read the next and not worry about + * it changing between now and the time we do the CAS. + */ + next = READ_ONCE(head->next); + if (try_cmpxchg_acquire(&list->head, &head, next)) { + /* + * Yay, got the node. This means it was on the list, + * which means should-be-on-freelist must be false no + * matter the refcount (because nobody else knows it's + * been taken off yet, it can't have been put back on). + */ + WARN_ON_ONCE(atomic_read(&head->refs) & REFS_ON_FREELIST); + + /* + * Decrease refcount twice, once for our ref, and once + * for the list's ref. + */ + atomic_fetch_add(-2, &head->refs); + + return head; + } + + /* + * OK, the head must have changed on us, but we still need to decrement + * the refcount we increased. + */ + refs = atomic_fetch_add(-1, &prev->refs); + if (refs == REFS_ON_FREELIST + 1) + __freelist_add(prev, list); + } + + return NULL; +} + +#endif /* FREELIST_H */ -- cgit v1.2.3 From 6e426e0fcd20ce144bb93e00b70df51e9f2e08c3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 29 Aug 2020 22:03:56 +0900 Subject: kprobes: Replace rp->free_instance with freelist Gets rid of rp->lock, and as a result kretprobes are now fully lockless. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/159870623583.1229682.17472357584134058687.stgit@devnote2 --- include/linux/kprobes.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 00cf4421efd5..b7824e3f1ef5 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #ifdef CONFIG_KPROBES @@ -157,17 +158,16 @@ struct kretprobe { int maxactive; int nmissed; size_t data_size; - struct hlist_head free_instances; + struct freelist_head freelist; struct kretprobe_holder *rph; - raw_spinlock_t lock; }; struct kretprobe_instance { union { - struct llist_node llist; - struct hlist_node hlist; + struct freelist_node freelist; struct rcu_head rcu; }; + struct llist_node llist; struct kretprobe_holder *rph; kprobe_opcode_t *ret_addr; void *fp; -- cgit v1.2.3 From 9b80e4c4ddaca3501177ed41e49d0928ba2122a8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 12 Aug 2020 14:47:03 -0700 Subject: overflow: Add __must_check attribute to check_*() helpers Since the destination variable of the check_*_overflow() helpers will contain a wrapped value on failure, it would be best to make sure callers really did check the return result of the helper. Adjust the macros to use a bool-wrapping static inline that is marked with __must_check. This means the macros can continue to have their type-agnostic behavior while gaining the function attribute (that cannot be applied directly to macros). Suggested-by: Rasmus Villemoes Link: https://lore.kernel.org/lkml/202008151007.EF679DF@keescook/ Signed-off-by: Kees Cook --- include/linux/overflow.h | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 93fcef105061..f1c4e7b56bd9 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -43,6 +43,16 @@ #define is_non_negative(a) ((a) > 0 || (a) == 0) #define is_negative(a) (!(is_non_negative(a))) +/* + * Allows for effectively applying __must_check to a macro so we can have + * both the type-agnostic benefits of the macros while also being able to + * enforce that the return value is, in fact, checked. + */ +static inline bool __must_check __must_check_overflow(bool overflow) +{ + return unlikely(overflow); +} + #ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW /* * For simplicity and code hygiene, the fallback code below insists on @@ -52,32 +62,32 @@ * alias for __builtin_add_overflow, but add type checks similar to * below. */ -#define check_add_overflow(a, b, d) ({ \ +#define check_add_overflow(a, b, d) __must_check_overflow(({ \ typeof(a) __a = (a); \ typeof(b) __b = (b); \ typeof(d) __d = (d); \ (void) (&__a == &__b); \ (void) (&__a == __d); \ __builtin_add_overflow(__a, __b, __d); \ -}) +})) -#define check_sub_overflow(a, b, d) ({ \ +#define check_sub_overflow(a, b, d) __must_check_overflow(({ \ typeof(a) __a = (a); \ typeof(b) __b = (b); \ typeof(d) __d = (d); \ (void) (&__a == &__b); \ (void) (&__a == __d); \ __builtin_sub_overflow(__a, __b, __d); \ -}) +})) -#define check_mul_overflow(a, b, d) ({ \ +#define check_mul_overflow(a, b, d) __must_check_overflow(({ \ typeof(a) __a = (a); \ typeof(b) __b = (b); \ typeof(d) __d = (d); \ (void) (&__a == &__b); \ (void) (&__a == __d); \ __builtin_mul_overflow(__a, __b, __d); \ -}) +})) #else @@ -190,21 +200,20 @@ }) -#define check_add_overflow(a, b, d) \ +#define check_add_overflow(a, b, d) __must_check_overflow( \ __builtin_choose_expr(is_signed_type(typeof(a)), \ __signed_add_overflow(a, b, d), \ - __unsigned_add_overflow(a, b, d)) + __unsigned_add_overflow(a, b, d))) -#define check_sub_overflow(a, b, d) \ +#define check_sub_overflow(a, b, d) __must_check_overflow( \ __builtin_choose_expr(is_signed_type(typeof(a)), \ __signed_sub_overflow(a, b, d), \ - __unsigned_sub_overflow(a, b, d)) + __unsigned_sub_overflow(a, b, d))) -#define check_mul_overflow(a, b, d) \ +#define check_mul_overflow(a, b, d) __must_check_overflow( \ __builtin_choose_expr(is_signed_type(typeof(a)), \ __signed_mul_overflow(a, b, d), \ - __unsigned_mul_overflow(a, b, d)) - + __unsigned_mul_overflow(a, b, d))) #endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ @@ -227,7 +236,7 @@ * '*d' will hold the results of the attempted shift, but is not * considered "safe for use" if false is returned. */ -#define check_shl_overflow(a, s, d) ({ \ +#define check_shl_overflow(a, s, d) __must_check_overflow(({ \ typeof(a) _a = a; \ typeof(s) _s = s; \ typeof(d) _d = d; \ @@ -237,7 +246,7 @@ *_d = (_a_full << _to_shift); \ (_to_shift != _s || is_negative(*_d) || is_negative(_a) || \ (*_d >> _to_shift) != _a); \ -}) +})) /** * array_size() - Calculate size of 2-dimensional array. -- cgit v1.2.3 From ee92e4f1f95eb7b8820299f10fc5fba16d85cece Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 8 Apr 2020 14:47:39 -0500 Subject: net/mlx5: Add NIC TX domain namespace Add new namespace that represents the NIC TX domain. Signed-off-by: Huy Nguyen Signed-off-by: Raed Salem Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 92d991d93757..846d94ad04bc 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -76,6 +76,7 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_SNIFFER_RX, MLX5_FLOW_NAMESPACE_SNIFFER_TX, MLX5_FLOW_NAMESPACE_EGRESS, + MLX5_FLOW_NAMESPACE_EGRESS_KERNEL, MLX5_FLOW_NAMESPACE_RDMA_RX, MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL, MLX5_FLOW_NAMESPACE_RDMA_TX, -- cgit v1.2.3 From 9b9d454ddbf0c41391ed68ea82bc3d8ff6a65074 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Fri, 5 Jun 2020 20:17:51 -0500 Subject: net/mlx5e: IPsec: Add TX steering rule per IPsec state Add new FTE in TX IPsec FT per IPsec state. It has the same matching criteria as the RX steering rule. The IPsec FT is created/destroyed when the first/last rule is added/deleted respectively. Signed-off-by: Huy Nguyen Reviewed-by: Boris Pismenny Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/qp.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 36492a1342cf..d75ef8aa8fac 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -245,6 +245,10 @@ enum { MLX5_ETH_WQE_SWP_OUTER_L4_UDP = 1 << 5, }; +enum { + MLX5_ETH_WQE_FT_META_IPSEC = BIT(0), +}; + struct mlx5_wqe_eth_seg { u8 swp_outer_l4_offset; u8 swp_outer_l3_offset; @@ -253,7 +257,7 @@ struct mlx5_wqe_eth_seg { u8 cs_flags; u8 swp_flags; __be16 mss; - __be32 rsvd2; + __be32 flow_table_metadata; union { struct { __be16 sz; -- cgit v1.2.3 From 3528f8ec95a5b1ee1b98d3e85371843c6428e4be Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Jul 2020 18:28:58 -0700 Subject: bcm963xx_tag.h: fix duplicated word Change doubled word "is" to "it is". Signed-off-by: Randy Dunlap Cc: Florian Fainelli Cc: bcm-kernel-feedback-list@broadcom.com Cc: linux-mips@vger.kernel.org Acked-by: Florian Fainelli Signed-off-by: Thomas Bogendoerfer --- include/linux/bcm963xx_tag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bcm963xx_tag.h b/include/linux/bcm963xx_tag.h index b87945cb6946..7edb809a2586 100644 --- a/include/linux/bcm963xx_tag.h +++ b/include/linux/bcm963xx_tag.h @@ -84,7 +84,7 @@ struct bcm_tag { char flash_layout_ver[FLASHLAYOUTVER_LEN]; /* 196-199: kernel+rootfs CRC32 */ __u32 fskernel_crc; - /* 200-215: Unused except on Alice Gate where is is information */ + /* 200-215: Unused except on Alice Gate where it is information */ char information2[TAGINFO2_LEN]; /* 216-219: CRC32 of image less imagetag (kernel for Alice Gate) */ __u32 image_crc; -- cgit v1.2.3 From f82cd2f0b5eb715b1a296e20b34da7d296b6e9a4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 18 Aug 2020 09:05:56 -0400 Subject: XArray: Add private interface for workingset node deletion Move the tricky bits of dealing with the XArray from the workingset code to the XArray. Make it clear in the documentation that this is a private interface, and only export it for the benefit of the test suite. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 6b336098fca7..29db4e16eb89 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1286,6 +1286,8 @@ static inline bool xa_is_advanced(const void *entry) */ typedef void (*xa_update_node_t)(struct xa_node *node); +void xa_delete_node(struct xa_node *, xa_update_node_t); + /* * The xa_state is opaque to its users. It contains various different pieces * of state involved in the current operation on the XArray. It should be -- cgit v1.2.3 From ca7b639e8611b3260a30b18aaa0d6db9c80a75ef Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 2 Aug 2020 14:17:21 -0400 Subject: XArray: Fix xas_reload for multi-index entries xas_reload() was only checking that the head entry was still at the head index. If the entry has been split, that's not enough as there may be a different entry at the specified index now. Solve this by checking the slot for the requested index instead of the head index. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 29db4e16eb89..4be9c57132fe 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1524,10 +1524,21 @@ void xas_create_range(struct xa_state *); static inline void *xas_reload(struct xa_state *xas) { struct xa_node *node = xas->xa_node; - - if (node) - return xa_entry(xas->xa, node, xas->xa_offset); - return xa_head(xas->xa); + void *entry; + char offset; + + if (!node) + return xa_head(xas->xa); + if (IS_ENABLED(CONFIG_XARRAY_MULTI)) { + offset = (xas->xa_index >> node->shift) & XA_CHUNK_MASK; + entry = xa_entry(xas->xa, node, offset); + if (!xa_is_sibling(entry)) + return entry; + offset = xa_to_sibling(entry); + } else { + offset = xas->xa_offset; + } + return xa_entry(xas->xa, node, offset); } /** -- cgit v1.2.3 From f78b8250a076ac63ddd021c7ea9739bcc2f6f737 Mon Sep 17 00:00:00 2001 From: Hui Su Date: Mon, 28 Sep 2020 01:15:53 +0800 Subject: radix-tree: fix the comment of radix_tree_next_slot() fix the comment of radix_tree_next_slot(): interator --> iterator. Signed-off-by: Hui Su Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/radix-tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 5c85059a92ba..64ad900ac742 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -377,7 +377,7 @@ radix_tree_chunk_size(struct radix_tree_iter *iter) * radix_tree_next_slot - find next slot in chunk * * @slot: pointer to current slot - * @iter: pointer to interator state + * @iter: pointer to iterator state * @flags: RADIX_TREE_ITER_*, should be constant * Returns: pointer to next slot, or NULL if there no more left * -- cgit v1.2.3 From 8bca49e43fb5994bd2a03f3f114cf89bc6f87a14 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Apr 2020 15:51:55 +0300 Subject: drm: shmobile: Reduce include dependencies This file doesn't need anything provided by . All it needs are some types, which are provided by . Drop unneeded completely. Signed-off-by: Andy Shevchenko Reviewed-by: Laurent Pinchart Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20200422125201.37618-1-andriy.shevchenko@linux.intel.com --- include/linux/platform_data/shmob_drm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h index fe815d7d9f58..d661399b217d 100644 --- a/include/linux/platform_data/shmob_drm.h +++ b/include/linux/platform_data/shmob_drm.h @@ -10,8 +10,6 @@ #ifndef __SHMOB_DRM_H__ #define __SHMOB_DRM_H__ -#include - #include enum shmob_drm_clk_source { -- cgit v1.2.3 From 3b481d91356e5693d8358d4ef9c383bdb92c8da0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 24 Sep 2020 13:53:28 -0700 Subject: block: add zone specific block statuses A zoned device with limited resources to open or activate zones may return an error when the host exceeds those limits. The same command may be successful if retried later, but the host needs to wait for specific zone states before it should expect a retry to succeed. Have the block layer provide an appropriate status for these conditions so applications can distinuguish this error for special handling. Cc: linux-api@vger.kernel.org Cc: Niklas Cassel Reviewed-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 7d7c13238fdb..d9b69bbde5cc 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -104,6 +104,24 @@ typedef u8 __bitwise blk_status_t; */ #define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14) +/* + * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion + * path if the device returns a status indicating that too many zone resources + * are currently open. The same command should be successful if resubmitted + * after the number of open zones decreases below the device's limits, which is + * reported in the request_queue's max_open_zones. + */ +#define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)15) + +/* + * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion + * path if the device returns a status indicating that too many zone resources + * are currently active. The same command should be successful if resubmitted + * after the number of active zones decreases below the device's limits, which + * is reported in the request_queue's max_active_zones. + */ +#define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16) + /** * blk_path_error - returns true if error may be path related * @error: status the request was completed with -- cgit v1.2.3 From bf41a0910cb2dd06abd4d6a920edcee798460ad7 Mon Sep 17 00:00:00 2001 From: Rishabh Bhatnagar Date: Fri, 2 Oct 2020 11:09:02 -0700 Subject: remoteproc: Change default dump configuration to "disabled" Currently "default" configuration option means coredumps are enabled. To avoid confusion rename the "default" configuration option to "enabled" and disable collection of dumps by default as doing so makes sense for production devices. Signed-off-by: Rishabh Bhatnagar Link: https://lore.kernel.org/r/1601662144-5964-2-git-send-email-rishabhb@codeaurora.org Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 2fa68bf5aa4f..3fa3ba6498e8 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -442,16 +442,16 @@ enum rproc_crash_type { /** * enum rproc_dump_mechanism - Coredump options for core - * @RPROC_COREDUMP_DEFAULT: Copy dump to separate buffer and carry on with + * @RPROC_COREDUMP_DISABLED: Don't perform any dump + * @RPROC_COREDUMP_ENABLED: Copy dump to separate buffer and carry on with recovery * @RPROC_COREDUMP_INLINE: Read segments directly from device memory. Stall recovery until all segments are read - * @RPROC_COREDUMP_DISABLED: Don't perform any dump */ enum rproc_dump_mechanism { - RPROC_COREDUMP_DEFAULT, - RPROC_COREDUMP_INLINE, RPROC_COREDUMP_DISABLED, + RPROC_COREDUMP_ENABLED, + RPROC_COREDUMP_INLINE, }; /** -- cgit v1.2.3 From 44fa32f008ab7092842095a7a474c48303a2f186 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 12 Oct 2020 10:01:27 +0200 Subject: net: add function dev_fetch_sw_netstats for fetching pcpu_sw_netstats In several places the same code is used to populate rtnl_link_stats64 fields with data from pcpu_sw_netstats. Therefore factor out this code to a new function dev_fetch_sw_netstats(). v2: - constify argument netstats - don't ignore netstats being NULL or an ERRPTR - switch to EXPORT_SYMBOL_GPL Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/6d16a338-52f5-df69-0020-6bc771a7d498@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 948d7105e91a..964b494b0e8d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4499,6 +4499,8 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, struct rtnl_link_stats64 *storage); void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, const struct net_device_stats *netdev_stats); +void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, + const struct pcpu_sw_netstats __percpu *netstats); extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; -- cgit v1.2.3 From 1f7a44f63e6c782c9c2aa9f18f40c23914e6b46a Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 13 Oct 2020 16:47:33 -0700 Subject: compiler-clang: add build check for clang 10.0.1 Patch series "set clang minimum version to 10.0.1", v3. Adds a compile time #error to compiler-clang.h setting the effective minimum supported version to clang 10.0.1. A separate patch has already been picked up into the Documentation/ tree also confirming the version. Next are a series of reverts. One for 32b arm is a partial revert. Then Marco suggested fixes to KASAN docs. Finally, improve the warning for GCC too as per Kees. This patch (of 7): During Plumbers 2020, we voted to just support the latest release of Clang for now. Add a compile time check for this. We plan to remove workarounds for older versions now, which will break in subtle and not so subtle ways. Suggested-by: Sedat Dilek Suggested-by: Nathan Chancellor Suggested-by: Kees Cook Signed-off-by: Nick Desaulniers Signed-off-by: Andrew Morton Tested-by: Sedat Dilek Reviewed-by: Kees Cook Reviewed-by: Miguel Ojeda Reviewed-by: Sedat Dilek Acked-by: Marco Elver Acked-by: Nathan Chancellor Acked-by: Sedat Dilek Cc: Andrey Konovalov Cc: Fangrui Song Cc: Masahiro Yamada Cc: Daniel Borkmann Cc: Alexei Starovoitov Cc: Will Deacon Cc: Vincenzo Frascino Link: https://lkml.kernel.org/r/20200902225911.209899-1-ndesaulniers@google.com Link: https://lkml.kernel.org/r/20200902225911.209899-2-ndesaulniers@google.com Link: https://github.com/ClangBuiltLinux/linux/issues/9 Link: https://github.com/ClangBuiltLinux/linux/issues/941 Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index cee0c728d39a..230604e7f057 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -3,6 +3,14 @@ #error "Please don't include directly, include instead." #endif +#define CLANG_VERSION (__clang_major__ * 10000 \ + + __clang_minor__ * 100 \ + + __clang_patchlevel__) + +#if CLANG_VERSION < 100001 +# error Sorry, your version of Clang is too old - please use 10.0.1 or newer. +#endif + /* Compiler specific definitions for Clang compiler */ /* same as gcc, this was present in clang-2.6 so we can assume it works -- cgit v1.2.3 From c8db3b0a7ba7614f761f309d6aa7499127b18a0b Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 13 Oct 2020 16:47:55 -0700 Subject: compiler-gcc: improve version error As Kees suggests, doing so provides developers with two useful pieces of information: - The kernel build was attempting to use GCC. (Maybe they accidentally poked the wrong configs in a CI.) - They need 4.9 or better. ("Upgrade to what version?" doesn't need to be dug out of documentation, headers, etc.) Suggested-by: Kees Cook Signed-off-by: Nick Desaulniers Signed-off-by: Andrew Morton Tested-by: Sedat Dilek Reviewed-by: Kees Cook Reviewed-by: Miguel Ojeda Reviewed-by: Nathan Chancellor Reviewed-by: Sedat Dilek Cc: Andrey Konovalov Cc: Fangrui Song Cc: Marco Elver Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Masahiro Yamada Cc: Vincenzo Frascino Cc: Will Deacon Link: https://lkml.kernel.org/r/20200902225911.209899-8-ndesaulniers@google.com Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7a3769040d7d..d1e3c6896b71 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -12,7 +12,7 @@ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */ #if GCC_VERSION < 40900 -# error Sorry, your compiler is too old - please upgrade it. +# error Sorry, your version of GCC is too old - please use 4.9 or newer. #endif /* Optimization barrier */ -- cgit v1.2.3 From a25c13b3aa1bdbf100e8770902c30908728f8410 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 13 Oct 2020 16:47:58 -0700 Subject: compiler.h: avoid escaped section names The stringification operator, `#`, in the preprocessor escapes strings. For example, `# "foo"` becomes `"\"foo\""`. GCC and Clang differ in how they treat section names that contain \". The portable solution is to not use a string literal with the preprocessor stringification operator. In this case, since __section unconditionally uses the stringification operator, we actually want the more verbose __attribute__((__section__())). Fixes: commit e04462fb82f8 ("Compiler Attributes: remove uses of __attribute__ from compiler.h") Signed-off-by: Nick Desaulniers Signed-off-by: Andrew Morton Cc: Miguel Ojeda Cc: Luc Van Oostenryck Cc: Nathan Chancellor Cc: Arvind Sankar Link: https://bugs.llvm.org/show_bug.cgi?id=42950 Link: https://lkml.kernel.org/r/20200929194318.548707-1-ndesaulniers@google.com Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 92ef163a7479..ac45f6d40d39 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -155,7 +155,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, extern typeof(sym) sym; \ static const unsigned long __kentry_##sym \ __used \ - __section("___kentry" "+" #sym ) \ + __attribute__((__section__("___kentry+" #sym))) \ = (unsigned long)&sym; #endif -- cgit v1.2.3 From 4d6fb34acb5d0bfc579ccd29df9cc6f653e51ab2 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 13 Oct 2020 16:48:01 -0700 Subject: export.h: fix section name for CONFIG_TRIM_UNUSED_KSYMS for Clang When enabling CONFIG_TRIM_UNUSED_KSYMS, the linker will warn about the orphan sections: (".discard.ksym") is being placed in '".discard.ksym"' repeatedly when linking vmlinux. This is because the stringification operator, `#`, in the preprocessor escapes strings. GCC and Clang differ in how they treat section names that contain \". The portable solution is to not use a string literal with the preprocessor stringification operator. Fixes: commit bbda5ec671d3 ("kbuild: simplify dependency generation for CONFIG_TRIM_UNUSED_KSYMS") Reported-by: kbuild test robot Suggested-by: Kees Cook Signed-off-by: Nick Desaulniers Signed-off-by: Andrew Morton Reviewed-by: Kees Cook Cc: Nathan Chancellor Cc: Masahiro Yamada Cc: Matthias Maennich Cc: Jessica Yu Cc: Greg Kroah-Hartman Cc: Will Deacon Link: https://bugs.llvm.org/show_bug.cgi?id=42950 Link: https://github.com/ClangBuiltLinux/linux/issues/1166 Link: https://lkml.kernel.org/r/20200929190701.398762-1-ndesaulniers@google.com Signed-off-by: Linus Torvalds --- include/linux/export.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/export.h b/include/linux/export.h index fceb5e855717..8933ff6ad23a 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -130,7 +130,7 @@ struct kernel_symbol { * discarded in the final link stage. */ #define __ksym_marker(sym) \ - static int __ksym_marker_##sym[0] __section(".discard.ksym") __used + static int __ksym_marker_##sym[0] __section(.discard.ksym) __used #define __EXPORT_SYMBOL(sym, sec, ns) \ __ksym_marker(sym); \ -- cgit v1.2.3 From d7cff4ded857ff7cc3e49eb39cc14df9345b9662 Mon Sep 17 00:00:00 2001 From: tangjianqiang Date: Tue, 13 Oct 2020 16:48:37 -0700 Subject: include/linux/slab.h: fix a typo error in comment fix a typo error in slab.h "allocagtor" -> "allocator" Signed-off-by: tangjianqiang Signed-off-by: Andrew Morton Acked-by: Souptick Joarder Link: https://lkml.kernel.org/r/1600230053-24303-1-git-send-email-tangjianqiang@xiaomi.com Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 24df2393ec03..9e155cc83b8a 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -279,7 +279,7 @@ static inline void __check_heap_object(const void *ptr, unsigned long n, #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) /* Maximum size for which we actually use a slab cache */ #define KMALLOC_MAX_CACHE_SIZE (1UL << KMALLOC_SHIFT_HIGH) -/* Maximum order allocatable via the slab allocagtor */ +/* Maximum order allocatable via the slab allocator */ #define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_MAX - PAGE_SHIFT) /* -- cgit v1.2.3 From 3b0d31011d39759e3ba7214f75f77bb31983b5a4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 13 Oct 2020 16:49:02 -0700 Subject: x86/numa: add 'nohmat' option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Disable parsing of the HMAT for debug, to workaround broken platform instances, or cases where it is otherwise not wanted. [rdunlap@infradead.org: fix build when CONFIG_ACPI is not set] Link: https://lkml.kernel.org/r/70e5ee34-9809-a997-7b49-499e4be61307@infradead.org Signed-off-by: Dan Williams Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ard Biesheuvel Cc: Ard Biesheuvel Cc: Benjamin Herrenschmidt Cc: Ben Skeggs Cc: Brice Goglin Cc: Catalin Marinas Cc: Daniel Vetter Cc: Dave Jiang Cc: David Airlie Cc: David Hildenbrand Cc: Greg Kroah-Hartman Cc: Ira Weiny Cc: Jason Gunthorpe Cc: Jeff Moyer Cc: Jia He Cc: Joao Martins Cc: Jonathan Cameron Cc: Michael Ellerman Cc: Mike Rapoport Cc: Paul Mackerras Cc: Pavel Tatashin Cc: "Rafael J. Wysocki" Cc: Tom Lendacky Cc: Vishal Verma Cc: Wei Yang Cc: Will Deacon Cc: Bjorn Helgaas Cc: Boris Ostrovsky Cc: Hulk Robot Cc: Jason Yan Cc: "Jérôme Glisse" Cc: Juergen Gross Cc: kernel test robot Cc: Randy Dunlap Cc: Stefano Stabellini Cc: Vivek Goyal Link: https://lkml.kernel.org/r/159643095540.4062302.732962081968036212.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds --- include/linux/acpi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 64ae25c59d55..cfa8c0015863 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -709,6 +709,8 @@ static inline u64 acpi_arch_get_root_pointer(void) #define ACPI_HANDLE_FWNODE(fwnode) (NULL) #define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (0), .cls_msk = (0), +#include + struct fwnode_handle; static inline bool acpi_dev_found(const char *hid) -- cgit v1.2.3 From c01044cc819160323f3ca4acd44fca487c4432e6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 13 Oct 2020 16:49:13 -0700 Subject: ACPI: HMAT: refactor hmat_register_target_device to hmem_register_device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for exposing "Soft Reserved" memory ranges without an HMAT, move the hmem device registration to its own compilation unit and make the implementation generic. The generic implementation drops usage acpi_map_pxm_to_online_node() that was translating ACPI proximity domain values and instead relies on numa_map_to_online_node() to determine the numa node for the device. [joao.m.martins@oracle.com: CONFIG_DEV_DAX_HMEM_DEVICES should depend on CONFIG_DAX=y] Link: https://lkml.kernel.org/r/8f34727f-ec2d-9395-cb18-969ec8a5d0d4@oracle.com Signed-off-by: Dan Williams Signed-off-by: Joao Martins Signed-off-by: Andrew Morton Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Ben Skeggs Cc: Borislav Petkov Cc: Brice Goglin Cc: Catalin Marinas Cc: Daniel Vetter Cc: Dave Hansen Cc: Dave Jiang Cc: David Airlie Cc: David Hildenbrand Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Ira Weiny Cc: Jason Gunthorpe Cc: Jeff Moyer Cc: Jia He Cc: Joao Martins Cc: Jonathan Cameron Cc: Michael Ellerman Cc: Mike Rapoport Cc: Paul Mackerras Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Tom Lendacky Cc: Vishal Verma Cc: Wei Yang Cc: Will Deacon Cc: Ard Biesheuvel Cc: Ard Biesheuvel Cc: Bjorn Helgaas Cc: Boris Ostrovsky Cc: Hulk Robot Cc: Jason Yan Cc: "Jérôme Glisse" Cc: Juergen Gross Cc: kernel test robot Cc: Randy Dunlap Cc: Stefano Stabellini Cc: Vivek Goyal Link: https://lkml.kernel.org/r/159643096584.4062302.5035370788475153738.stgit@dwillia2-desk3.amr.corp.intel.com Link: https://lore.kernel.org/r/158318761484.2216124.2049322072599482736.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds --- include/linux/dax.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 43b39ab9de1a..4ec0bbf86205 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -238,4 +238,12 @@ static inline bool dax_mapping(struct address_space *mapping) return mapping->host && IS_DAX(mapping->host); } +#ifdef CONFIG_DEV_DAX_HMEM_DEVICES +void hmem_register_device(int target_nid, struct resource *r); +#else +static inline void hmem_register_device(int target_nid, struct resource *r) +{ +} +#endif + #endif -- cgit v1.2.3 From a035b6bf863e5c42c2746de2a8ed6600140307e7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 13 Oct 2020 16:49:23 -0700 Subject: mm/memory_hotplug: introduce default phys_to_target_node() implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to set a fallback value for dev_dax->target_node, introduce generic fallback helpers for phys_to_target_node() A generic implementation based on node-data or memblock was proposed, but as noted by Mike: "Here again, I would prefer to add a weak default for phys_to_target_node() because the "generic" implementation is not really generic. The fallback to reserved ranges is x86 specfic because on x86 most of the reserved areas is not in memblock.memory. AFAIK, no other architecture does this." The info message in the generic memory_add_physaddr_to_nid() implementation is fixed up to properly reflect that memory_add_physaddr_to_nid() communicates "online" node info and phys_to_target_node() indicates "target / to-be-onlined" node info. [akpm@linux-foundation.org: fix CONFIG_MEMORY_HOTPLUG=n build] Link: https://lkml.kernel.org/r/202008252130.7YrHIyMI%25lkp@intel.com Signed-off-by: Dan Williams Signed-off-by: Andrew Morton Cc: David Hildenbrand Cc: Mike Rapoport Cc: Jia He Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Benjamin Herrenschmidt Cc: Ben Skeggs Cc: Borislav Petkov Cc: Brice Goglin Cc: Catalin Marinas Cc: Daniel Vetter Cc: Dave Hansen Cc: Dave Jiang Cc: David Airlie Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Ira Weiny Cc: Jason Gunthorpe Cc: Jeff Moyer Cc: Joao Martins Cc: Jonathan Cameron Cc: Michael Ellerman Cc: Paul Mackerras Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Tom Lendacky Cc: Vishal Verma Cc: Wei Yang Cc: Will Deacon Cc: Ard Biesheuvel Cc: Bjorn Helgaas Cc: Boris Ostrovsky Cc: Hulk Robot Cc: Jason Yan Cc: "Jérôme Glisse" Cc: Juergen Gross Cc: kernel test robot Cc: Randy Dunlap Cc: Stefano Stabellini Cc: Vivek Goyal Link: https://lkml.kernel.org/r/159643097768.4062302.3135192588966888630.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 23 ++++++++++++++--------- include/linux/numa.h | 11 ----------- 2 files changed, 14 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 375515803cd8..c0faa7a30c46 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -149,15 +149,6 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, struct mhp_params *params); #endif /* ARCH_HAS_ADD_PAGES */ -#ifdef CONFIG_NUMA -extern int memory_add_physaddr_to_nid(u64 start); -#else -static inline int memory_add_physaddr_to_nid(u64 start) -{ - return 0; -} -#endif - #ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION /* * For supporting node-hotadd, we have to allocate a new pgdat. @@ -284,6 +275,20 @@ static inline bool movable_node_is_enabled(void) } #endif /* ! CONFIG_MEMORY_HOTPLUG */ +#ifdef CONFIG_NUMA +extern int memory_add_physaddr_to_nid(u64 start); +extern int phys_to_target_node(u64 start); +#else +static inline int memory_add_physaddr_to_nid(u64 start) +{ + return 0; +} +static inline int phys_to_target_node(u64 start) +{ + return 0; +} +#endif + #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) /* * pgdat resizing functions diff --git a/include/linux/numa.h b/include/linux/numa.h index a42df804679e..8cb33ccfb671 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -23,22 +23,11 @@ #ifdef CONFIG_NUMA /* Generic implementation available */ int numa_map_to_online_node(int node); - -/* - * Optional architecture specific implementation, users need a "depends - * on $ARCH" - */ -int phys_to_target_node(phys_addr_t addr); #else static inline int numa_map_to_online_node(int node) { return NUMA_NO_NODE; } - -static inline int phys_to_target_node(phys_addr_t addr) -{ - return NUMA_NO_NODE; -} #endif #endif /* _LINUX_NUMA_H */ -- cgit v1.2.3 From a4574f63edc6f76fb46dcd65d3eb4d5a8e23ba38 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 13 Oct 2020 16:50:29 -0700 Subject: mm/memremap_pages: convert to 'struct range' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'struct resource' in 'struct dev_pagemap' is only used for holding resource span information. The other fields, 'name', 'flags', 'desc', 'parent', 'sibling', and 'child' are all unused wasted space. This is in preparation for introducing a multi-range extension of devm_memremap_pages(). The bulk of this change is unwinding all the places internal to libnvdimm that used 'struct resource' unnecessarily, and replacing instances of 'struct dev_pagemap'.res with 'struct dev_pagemap'.range. P2PDMA had a minor usage of the resource flags field, but only to report failures with "%pR". That is replaced with an open coded print of the range. [dan.carpenter@oracle.com: mm/hmm/test: use after free in dmirror_allocate_chunk()] Link: https://lkml.kernel.org/r/20200926121402.GA7467@kadam Signed-off-by: Dan Williams Signed-off-by: Dan Carpenter Signed-off-by: Andrew Morton Reviewed-by: Boris Ostrovsky [xen] Cc: Paul Mackerras Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Vishal Verma Cc: Vivek Goyal Cc: Dave Jiang Cc: Ben Skeggs Cc: David Airlie Cc: Daniel Vetter Cc: Ira Weiny Cc: Bjorn Helgaas Cc: Juergen Gross Cc: Stefano Stabellini Cc: "Jérôme Glisse" Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brice Goglin Cc: Catalin Marinas Cc: Dave Hansen Cc: David Hildenbrand Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Hulk Robot Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Jason Yan Cc: Jeff Moyer Cc: Jia He Cc: Joao Martins Cc: Jonathan Cameron Cc: kernel test robot Cc: Mike Rapoport Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Randy Dunlap Cc: Thomas Gleixner Cc: Tom Lendacky Cc: Wei Yang Cc: Will Deacon Link: https://lkml.kernel.org/r/159643103173.4062302.768998885691711532.stgit@dwillia2-desk3.amr.corp.intel.com Link: https://lkml.kernel.org/r/160106115761.30709.13539840236873663620.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds --- include/linux/memremap.h | 5 +++-- include/linux/range.h | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index e5862746751b..d0dd261d87c0 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MEMREMAP_H_ #define _LINUX_MEMREMAP_H_ +#include #include #include @@ -93,7 +94,7 @@ struct dev_pagemap_ops { /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings * @altmap: pre-allocated/reserved memory for vmemmap allocations - * @res: physical address range covered by @ref + * @range: physical address range covered by @ref * @ref: reference count that pins the devm_memremap_pages() mapping * @internal_ref: internal reference if @ref is not provided by the caller * @done: completion for @internal_ref @@ -106,7 +107,7 @@ struct dev_pagemap_ops { */ struct dev_pagemap { struct vmem_altmap altmap; - struct resource res; + struct range range; struct percpu_ref *ref; struct percpu_ref internal_ref; struct completion done; diff --git a/include/linux/range.h b/include/linux/range.h index d1fbeb664012..274681cc3154 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -1,12 +1,18 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_RANGE_H #define _LINUX_RANGE_H +#include struct range { u64 start; u64 end; }; +static inline u64 range_len(const struct range *range) +{ + return range->end - range->start + 1; +} + int add_range(struct range *range, int az, int nr_range, u64 start, u64 end); -- cgit v1.2.3 From b7b3c01b191596d27a6980d1a42504f5b607f802 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 13 Oct 2020 16:50:34 -0700 Subject: mm/memremap_pages: support multiple ranges per invocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In support of device-dax growing the ability to front physically dis-contiguous ranges of memory, update devm_memremap_pages() to track multiple ranges with a single reference counter and devm instance. Convert all [devm_]memremap_pages() users to specify the number of ranges they are mapping in their 'struct dev_pagemap' instance. Signed-off-by: Dan Williams Signed-off-by: Andrew Morton Cc: Paul Mackerras Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Vishal Verma Cc: Vivek Goyal Cc: Dave Jiang Cc: Ben Skeggs Cc: David Airlie Cc: Daniel Vetter Cc: Ira Weiny Cc: Bjorn Helgaas Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Cc: "Jérôme Glisse" Cc: Ard Biesheuvel Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brice Goglin Cc: Catalin Marinas Cc: Dave Hansen Cc: David Hildenbrand Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Hulk Robot Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Jason Yan Cc: Jeff Moyer Cc: "Jérôme Glisse" Cc: Jia He Cc: Joao Martins Cc: Jonathan Cameron Cc: kernel test robot Cc: Mike Rapoport Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Randy Dunlap Cc: Thomas Gleixner Cc: Tom Lendacky Cc: Wei Yang Cc: Will Deacon Link: https://lkml.kernel.org/r/159643103789.4062302.18426128170217903785.stgit@dwillia2-desk3.amr.corp.intel.com Link: https://lkml.kernel.org/r/160106116293.30709.13350662794915396198.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds --- include/linux/memremap.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index d0dd261d87c0..79c49e7f5c30 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -94,7 +94,6 @@ struct dev_pagemap_ops { /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings * @altmap: pre-allocated/reserved memory for vmemmap allocations - * @range: physical address range covered by @ref * @ref: reference count that pins the devm_memremap_pages() mapping * @internal_ref: internal reference if @ref is not provided by the caller * @done: completion for @internal_ref @@ -104,10 +103,12 @@ struct dev_pagemap_ops { * @owner: an opaque pointer identifying the entity that manages this * instance. Used by various helpers to make sure that no * foreign ZONE_DEVICE memory is accessed. + * @nr_range: number of ranges to be mapped + * @range: range to be mapped when nr_range == 1 + * @ranges: array of ranges to be mapped when nr_range > 1 */ struct dev_pagemap { struct vmem_altmap altmap; - struct range range; struct percpu_ref *ref; struct percpu_ref internal_ref; struct completion done; @@ -115,6 +116,11 @@ struct dev_pagemap { unsigned int flags; const struct dev_pagemap_ops *ops; void *owner; + int nr_range; + union { + struct range range; + struct range ranges[0]; + }; }; static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) -- cgit v1.2.3 From bac3cf4d01d43b587c873360dc8c84e3b570b344 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Tue, 13 Oct 2020 16:51:14 -0700 Subject: mm, dump_page: rename head_mapcount() --> head_compound_mapcount() Rename head_pincount() --> head_compound_pincount(). These names are more accurate (or less misleading) than the original ones. Signed-off-by: John Hubbard Signed-off-by: Andrew Morton Cc: Qian Cai Cc: Matthew Wilcox Cc: Vlastimil Babka Cc: Kirill A. Shutemov Cc: Mike Rapoport Cc: William Kucharski Link: https://lkml.kernel.org/r/20200807183358.105097-1-jhubbard@nvidia.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 13dc9b9ccf8e..9cc0894e7d61 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -791,7 +791,7 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags) extern void kvfree(const void *addr); extern void kvfree_sensitive(const void *addr, size_t len); -static inline int head_mapcount(struct page *head) +static inline int head_compound_mapcount(struct page *head) { return atomic_read(compound_mapcount_ptr(head)) + 1; } @@ -805,7 +805,7 @@ static inline int compound_mapcount(struct page *page) { VM_BUG_ON_PAGE(!PageCompound(page), page); page = compound_head(page); - return head_mapcount(page); + return head_compound_mapcount(page); } /* @@ -918,7 +918,7 @@ static inline bool hpage_pincount_available(struct page *page) return PageCompound(page) && compound_order(page) > 1; } -static inline int head_pincount(struct page *head) +static inline int head_compound_pincount(struct page *head) { return atomic_read(compound_pincount_ptr(head)); } @@ -927,7 +927,7 @@ static inline int compound_pincount(struct page *page) { VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); page = compound_head(page); - return head_pincount(page); + return head_compound_pincount(page); } static inline void set_compound_order(struct page *page, unsigned int order) -- cgit v1.2.3 From 61ef1865570452801f6e554a668e049c2e25c1fd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 13 Oct 2020 16:51:17 -0700 Subject: mm: factor find_get_incore_page out of mincore_page Patch series "Return head pages from find_*_entry", v2. This patch series started out as part of the THP patch set, but it has some nice effects along the way and it seems worth splitting it out and submitting separately. Currently find_get_entry() and find_lock_entry() return the page corresponding to the requested index, but the first thing most callers do is find the head page, which we just threw away. As part of auditing all the callers, I found some misuses of the APIs and some plain inefficiencies that I've fixed. The diffstat is unflattering, but I added more kernel-doc and a new wrapper. This patch (of 8); Provide this functionality from the swap cache. It's useful for more than just mincore(). Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: Hugh Dickins Cc: William Kucharski Cc: Jani Nikula Cc: Alexey Dobriyan Cc: Johannes Weiner Cc: Chris Wilson Cc: Matthew Auld Cc: Huang Ying Link: https://lkml.kernel.org/r/20200910183318.20139-1-willy@infradead.org Link: https://lkml.kernel.org/r/20200910183318.20139-2-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/swap.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 4340a7b6e7a1..23c6e43a956d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -427,6 +427,7 @@ extern void free_pages_and_swap_cache(struct page **, int); extern struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr); +struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index); extern struct page *read_swap_cache_async(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr, bool do_poll); @@ -570,6 +571,12 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp, return NULL; } +static inline +struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index) +{ + return find_get_page(mapping, index); +} + static inline int add_to_swap(struct page *page) { return 0; -- cgit v1.2.3 From 9dfc8ff34b951f83632815a87e97a625a11360f0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 13 Oct 2020 16:51:31 -0700 Subject: i915: use find_lock_page instead of find_lock_entry i915 does not want to see value entries. Switch it to use find_lock_page() instead, and remove the export of find_lock_entry(). Move find_lock_entry() and find_get_entry() to mm/internal.h to discourage any future use. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Cc: Alexey Dobriyan Cc: Chris Wilson Cc: Huang Ying Cc: Hugh Dickins Cc: Jani Nikula Cc: Matthew Auld Cc: William Kucharski Link: https://lkml.kernel.org/r/20200910183318.20139-6-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 434c9c34aeb6..9d282fe6700d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -385,8 +385,6 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) return head + (index & (thp_nr_pages(head) - 1)); } -struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); -struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); unsigned find_get_entries(struct address_space *mapping, pgoff_t start, unsigned int nr_entries, struct page **entries, pgoff_t *indices); -- cgit v1.2.3 From 63ec1973ddf3eb70feb5728088ca190f1af449cb Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 13 Oct 2020 16:51:38 -0700 Subject: mm/shmem: return head page from find_lock_entry Convert shmem_getpage_gfp() (the only remaining caller of find_lock_entry()) to cope with a head page being returned instead of the subpage for the index. [willy@infradead.org: fix BUG()s] Link https://lore.kernel.org/linux-mm/20200912032042.GA6583@casper.infradead.org/ Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: Alexey Dobriyan Cc: Chris Wilson Cc: Huang Ying Cc: Hugh Dickins Cc: Jani Nikula Cc: Johannes Weiner Cc: Matthew Auld Cc: William Kucharski Link: https://lkml.kernel.org/r/20200910183318.20139-8-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 9d282fe6700d..5176009e4ffa 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -372,6 +372,15 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, mapping_gfp_mask(mapping)); } +/* Does this page contain this index? */ +static inline bool thp_contains(struct page *head, pgoff_t index) +{ + /* HugeTLBfs indexes the page cache in units of hpage_size */ + if (PageHuge(head)) + return head->index == index; + return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL)); +} + /* * Given the page we found in the page cache, return the page corresponding * to this index in the file -- cgit v1.2.3 From a8cf7f272b5a28a62ecfc39d6f7d75b4f486e350 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 13 Oct 2020 16:51:41 -0700 Subject: mm: add find_lock_head Add a new FGP_HEAD flag which avoids calling find_subpage() and add a convenience wrapper for it. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: Alexey Dobriyan Cc: Chris Wilson Cc: Huang Ying Cc: Hugh Dickins Cc: Jani Nikula Cc: Johannes Weiner Cc: Matthew Auld Cc: William Kucharski Link: https://lkml.kernel.org/r/20200910183318.20139-9-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 5176009e4ffa..1a3554f5d992 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -279,6 +279,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, #define FGP_NOFS 0x00000010 #define FGP_NOWAIT 0x00000020 #define FGP_FOR_MMAP 0x00000040 +#define FGP_HEAD 0x00000080 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, int fgp_flags, gfp_t cache_gfp_mask); @@ -310,18 +311,37 @@ static inline struct page *find_get_page_flags(struct address_space *mapping, * @mapping: the address_space to search * @offset: the page index * - * Looks up the page cache slot at @mapping & @offset. If there is a + * Looks up the page cache entry at @mapping & @offset. If there is a * page cache page, it is returned locked and with an increased * refcount. * - * Otherwise, %NULL is returned. - * - * find_lock_page() may sleep. + * Context: May sleep. + * Return: A struct page or %NULL if there is no page in the cache for this + * index. */ static inline struct page *find_lock_page(struct address_space *mapping, - pgoff_t offset) + pgoff_t index) +{ + return pagecache_get_page(mapping, index, FGP_LOCK, 0); +} + +/** + * find_lock_head - Locate, pin and lock a pagecache page. + * @mapping: The address_space to search. + * @offset: The page index. + * + * Looks up the page cache entry at @mapping & @offset. If there is a + * page cache page, its head page is returned locked and with an increased + * refcount. + * + * Context: May sleep. + * Return: A struct page which is !PageTail, or %NULL if there is no page + * in the cache for this index. + */ +static inline struct page *find_lock_head(struct address_space *mapping, + pgoff_t index) { - return pagecache_get_page(mapping, offset, FGP_LOCK, 0); + return pagecache_get_page(mapping, index, FGP_LOCK | FGP_HEAD, 0); } /** -- cgit v1.2.3 From eb1d7a65f08a52dfb828bf45b4ead7f617c64047 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Tue, 13 Oct 2020 16:51:47 -0700 Subject: mm, fadvise: improve the expensive remote LRU cache draining after FADV_DONTNEED Our users reported that there're some random latency spikes when their RT process is running. Finally we found that latency spike is caused by FADV_DONTNEED. Which may call lru_add_drain_all() to drain LRU cache on remote CPUs, and then waits the per-cpu work to complete. The wait time is uncertain, which may be tens millisecond. That behavior is unreasonable, because this process is bound to a specific CPU and the file is only accessed by itself, IOW, there should be no pagecache pages on a per-cpu pagevec of a remote CPU. That unreasonable behavior is partially caused by the wrong comparation of the number of invalidated pages and the number of the target. For example, if (count < (end_index - start_index + 1)) The count above is how many pages were invalidated in the local CPU, and (end_index - start_index + 1) is how many pages should be invalidated. The usage of (end_index - start_index + 1) is incorrect, because they are virtual addresses, which may not mapped to pages. Besides that, there may be holes between start and end. So we'd better check whether there are still pages on per-cpu pagevec after drain the local cpu, and then decide whether or not to call lru_add_drain_all(). After I applied it with a hotfix to our production environment, most of the lru_add_drain_all() can be avoided. Suggested-by: Mel Gorman Signed-off-by: Yafang Shao Signed-off-by: Andrew Morton Acked-by: Mel Gorman Cc: Johannes Weiner Link: https://lkml.kernel.org/r/20200923133318.14373-1-laoar.shao@gmail.com Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2e621d28cd65..5815f7d4dbf4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2581,6 +2581,10 @@ extern bool is_bad_inode(struct inode *); unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); +void invalidate_mapping_pagevec(struct address_space *mapping, + pgoff_t start, pgoff_t end, + unsigned long *nr_pagevec); + static inline void invalidate_remote_inode(struct inode *inode) { if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || -- cgit v1.2.3 From 3264631548b1f2bf89b71793d06bfd0f748f649d Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 13 Oct 2020 16:52:04 -0700 Subject: swap: rename SWP_FS to SWAP_FS_OPS to avoid ambiguity SWP_FS is used to make swap_{read,write}page() go through the filesystem, and it's only used for swap files over NFS for now. Otherwise it will directly submit IO to blockdev according to swapfile extents reported by filesystems in advance. As Matthew pointed out [1], SWP_FS naming is somewhat confusing, so let's rename to SWP_FS_OPS. [1] https://lore.kernel.org/r/20200820113448.GM17456@casper.infradead.org Suggested-by: Matthew Wilcox Signed-off-by: Gao Xiang Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200822113019.11319-1-hsiangkao@redhat.com Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 23c6e43a956d..7bd5b4aac049 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -170,7 +170,7 @@ enum { SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */ SWP_BLKDEV = (1 << 6), /* its a block device */ SWP_ACTIVATED = (1 << 7), /* set after swap_activate success */ - SWP_FS = (1 << 8), /* swap file goes through fs */ + SWP_FS_OPS = (1 << 8), /* swapfile operations go through fs */ SWP_AREA_DISCARD = (1 << 9), /* single-time swap area discards */ SWP_PAGE_DISCARD = (1 << 10), /* freed swap page-cluster discards */ SWP_STABLE_WRITES = (1 << 11), /* no overwrite PG_writeback pages */ -- cgit v1.2.3 From cc2828b21c764f901128ca2e7b9f056d0e72104f Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Tue, 13 Oct 2020 16:52:08 -0700 Subject: mm: remove activate_page() from unuse_pte() We don't initially add anon pages to active lruvec after commit b518154e59aa ("mm/vmscan: protect the workingset on anonymous LRU"). Remove activate_page() from unuse_pte(), which seems to be missed by the commit. And make the function static while we are at it. Before the commit, we called lru_cache_add_active_or_unevictable() to add new ksm pages to active lruvec. Therefore, activate_page() wasn't necessary for them in the first place. Signed-off-by: Yu Zhao Signed-off-by: Andrew Morton Reviewed-by: Yang Shi Cc: Alexander Duyck Cc: Huang Ying Cc: David Hildenbrand Cc: Michal Hocko Cc: Qian Cai Cc: Mel Gorman Cc: Nicholas Piggin Cc: Hugh Dickins Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/20200818184704.3625199-1-yuzhao@google.com Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 7bd5b4aac049..667935c0dbd4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -340,7 +340,6 @@ extern void lru_note_cost_page(struct page *); extern void lru_cache_add(struct page *); extern void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *head); -extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); -- cgit v1.2.3 From f3bc52cb04bcfccd12da3f03ca8bc50484898436 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 13 Oct 2020 16:52:18 -0700 Subject: mm/swap_slots.c: remove always zero and unused return value of enable_swap_slots_cache() enable_swap_slots_cache() always return zero and its return value is just ignored by the caller. So make enable_swap_slots_cache() void. Signed-off-by: Miaohe Lin Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200924113554.50614-1-linmiaohe@huawei.com Signed-off-by: Linus Torvalds --- include/linux/swap_slots.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h index e36b200c2a77..347f1a304190 100644 --- a/include/linux/swap_slots.h +++ b/include/linux/swap_slots.h @@ -23,7 +23,7 @@ struct swap_slots_cache { void disable_swap_slots_cache_lock(void); void reenable_swap_slots_cache_unlock(void); -int enable_swap_slots_cache(void); +void enable_swap_slots_cache(void); int free_swap_slot(swp_entry_t entry); extern bool swap_slot_cache_enabled; -- cgit v1.2.3 From bd0b230fe14554bfffbae54e19038716f96f5a41 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 13 Oct 2020 16:52:56 -0700 Subject: mm/memcg: unify swap and memsw page counters The swap page counter is v2 only while memsw is v1 only. As v1 and v2 controllers cannot be active at the same time, there is no point to keep both swap and memsw page counters in mem_cgroup. The previous patch has made sure that memsw page counter is updated and accessed only when in v1 code paths. So it is now safe to alias the v1 memsw page counter to v2 swap page counter. This saves 14 long's in the size of mem_cgroup. This is a saving of 112 bytes for 64-bit archs. While at it, also document which page counters are used in v1 and/or v2. Signed-off-by: Waiman Long Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Michal Hocko Cc: Chris Down Cc: Johannes Weiner Cc: Roman Gushchin Cc: Tejun Heo Cc: Vladimir Davydov Cc: Yafang Shao Link: https://lkml.kernel.org/r/20200914024452.19167-4-longman@redhat.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d0b036123c6a..6ef4a552e09d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -215,13 +215,16 @@ struct mem_cgroup { struct mem_cgroup_id id; /* Accounted resources */ - struct page_counter memory; - struct page_counter swap; + struct page_counter memory; /* Both v1 & v2 */ + + union { + struct page_counter swap; /* v2 only */ + struct page_counter memsw; /* v1 only */ + }; /* Legacy consumer-oriented counters */ - struct page_counter memsw; - struct page_counter kmem; - struct page_counter tcpmem; + struct page_counter kmem; /* v1 only */ + struct page_counter tcpmem; /* v1 only */ /* Range enforcement for interrupt charges */ struct work_struct high_work; -- cgit v1.2.3 From b2b29d6d01194404dfef4eafa026959be301705b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 13 Oct 2020 16:53:22 -0700 Subject: mm: account PMD tables like PTE tables We account the PTE level of the page tables to the process in order to make smarter OOM decisions and help diagnose why memory is fragmented. For these same reasons, we should account pages allocated for PMDs. With larger process address spaces and ASLR, the number of PMDs in use is higher than it used to be so the inaccuracy is starting to matter. [rppt@linux.ibm.com: arm: __pmd_free_tlb(): call page table destructor] Link: https://lkml.kernel.org/r/20200825111303.GB69694@linux.ibm.com Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: Mike Rapoport Cc: Abdul Haleem Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Christophe Leroy Cc: Joerg Roedel Cc: Max Filippov Cc: Peter Zijlstra Cc: Satheesh Rajendran Cc: Stafford Horne Cc: Naresh Kamboju Cc: Anders Roxell Link: http://lkml.kernel.org/r/20200627184642.GF25039@casper.infradead.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9cc0894e7d61..5320e7ab843f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2254,7 +2254,7 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) return ptlock_ptr(pmd_to_page(pmd)); } -static inline bool pgtable_pmd_page_ctor(struct page *page) +static inline bool pmd_ptlock_init(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE page->pmd_huge_pte = NULL; @@ -2262,7 +2262,7 @@ static inline bool pgtable_pmd_page_ctor(struct page *page) return ptlock_init(page); } -static inline void pgtable_pmd_page_dtor(struct page *page) +static inline void pmd_ptlock_free(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE VM_BUG_ON_PAGE(page->pmd_huge_pte, page); @@ -2279,8 +2279,8 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) return &mm->page_table_lock; } -static inline bool pgtable_pmd_page_ctor(struct page *page) { return true; } -static inline void pgtable_pmd_page_dtor(struct page *page) {} +static inline bool pmd_ptlock_init(struct page *page) { return true; } +static inline void pmd_ptlock_free(struct page *page) {} #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) @@ -2293,6 +2293,22 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) return ptl; } +static inline bool pgtable_pmd_page_ctor(struct page *page) +{ + if (!pmd_ptlock_init(page)) + return false; + __SetPageTable(page); + inc_zone_page_state(page, NR_PAGETABLE); + return true; +} + +static inline void pgtable_pmd_page_dtor(struct page *page) +{ + pmd_ptlock_free(page); + __ClearPageTable(page); + dec_zone_page_state(page, NR_PAGETABLE); +} + /* * No scalability reason to split PUD locks yet, but follow the same pattern * as the PMD locks to make it easier if we decide to. The VM should not be -- cgit v1.2.3 From 07e5bfe651f8595ca6a777d989016aa8d8217924 Mon Sep 17 00:00:00 2001 From: Chinwen Chang Date: Tue, 13 Oct 2020 16:53:39 -0700 Subject: mmap locking API: add mmap_lock_is_contended() Patch series "Try to release mmap_lock temporarily in smaps_rollup", v4. Recently, we have observed some janky issues caused by unpleasantly long contention on mmap_lock which is held by smaps_rollup when probing large processes. To address the problem, we let smaps_rollup detect if anyone wants to acquire mmap_lock for write attempts. If yes, just release the lock temporarily to ease the contention. smaps_rollup is a procfs interface which allows users to summarize the process's memory usage without the overhead of seq_* calls. Android uses it to sample the memory usage of various processes to balance its memory pool sizes. If no one wants to take the lock for write requests, smaps_rollup with this patch will behave like the original one. Although there are on-going mmap_lock optimizations like range-based locks, the lock applied to smaps_rollup would be the coarse one, which is hard to avoid the occurrence of aforementioned issues. So the detection and temporary release for write attempts on mmap_lock in smaps_rollup is still necessary. This patch (of 3): Add new API to query if someone wants to acquire mmap_lock for write attempts. Using this instead of rwsem_is_contended makes it more tolerant of future changes to the lock type. Signed-off-by: Chinwen Chang Signed-off-by: Andrew Morton Reviewed-by: Steven Price Acked-by: Michel Lespinasse Cc: Alexey Dobriyan Cc: Daniel Jordan Cc: Daniel Kiss Cc: Davidlohr Bueso Cc: Huang Ying Cc: Jason Gunthorpe Cc: Jimmy Assarsson Cc: Laurent Dufour Cc: "Matthew Wilcox (Oracle)" Cc: Matthias Brugger Cc: Song Liu Cc: Vlastimil Babka Link: http://lkml.kernel.org/r/1597715898-3854-1-git-send-email-chinwen.chang@mediatek.com Link: http://lkml.kernel.org/r/1597715898-3854-2-git-send-email-chinwen.chang@mediatek.com Signed-off-by: Linus Torvalds --- include/linux/mmap_lock.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 0707671851a8..18e7eae9b5ba 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -87,4 +87,9 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm) VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); } +static inline int mmap_lock_is_contended(struct mm_struct *mm) +{ + return rwsem_is_contended(&mm->mmap_lock); +} + #endif /* _LINUX_MMAP_LOCK_H */ -- cgit v1.2.3 From e18c45ffcfa347b13c2f300f290bacff55a4b41e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 13 Oct 2020 16:53:50 -0700 Subject: mm: move PageDoubleMap bit Patch series "Fix PageDoubleMap". This is a purely theoretical problem for now as none of the filesystems which use PG_private_2 (ie PG_fscache) are being converted at this time, but it's confusing to leave it like this. This patch (of 2): PG_private_2 is defined as being PF_ANY (applicable to tail pages as well as regular & head pages). That means that the first tail page of a double-map page will appear to have Private2 set. Use the Workingset bit instead which is defined as PF_HEAD so any attempt to access the Workingset bit on a tail page will redirect to the head page's Workingset bit. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: Zi Yan Link: https://lkml.kernel.org/r/20200629151933.15671-1-willy@infradead.org Link: https://lkml.kernel.org/r/20200629151933.15671-2-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 276140c94f4a..76413b2ffef0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -167,7 +167,7 @@ enum pageflags { PG_slob_free = PG_private, /* Compound pages. Stored in first tail page's flags */ - PG_double_map = PG_private_2, + PG_double_map = PG_workingset, /* non-lru isolated movable page */ PG_isolated = PG_reclaim, -- cgit v1.2.3 From a08d93e5752a35a771054f6c463f789720f9a3e8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 13 Oct 2020 16:53:54 -0700 Subject: mm: simplify PageDoubleMap with PF_SECOND policy Introduce the new page policy of PF_SECOND which lets us use the normal pageflags generation machinery to create the various DoubleMap manipulation functions. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: Zi Yan Link: https://lkml.kernel.org/r/20200629151933.15671-3-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 40 ++++++++++------------------------------ 1 file changed, 10 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 76413b2ffef0..38ded408bd4c 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -235,6 +235,9 @@ static inline void page_init_poison(struct page *page, size_t size) * * PF_NO_COMPOUND: * the page flag is not relevant for compound pages. + * + * PF_SECOND: + * the page flag is stored in the first tail page. */ #define PF_POISONED_CHECK(page) ({ \ VM_BUG_ON_PGFLAGS(PagePoisoned(page), page); \ @@ -250,6 +253,9 @@ static inline void page_init_poison(struct page *page, size_t size) #define PF_NO_COMPOUND(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(enforce && PageCompound(page), page); \ PF_POISONED_CHECK(page); }) +#define PF_SECOND(page, enforce) ({ \ + VM_BUG_ON_PGFLAGS(!PageHead(page), page); \ + PF_POISONED_CHECK(&page[1]); }) /* * Macros to create function definitions for page flags @@ -688,42 +694,15 @@ static inline int PageTransTail(struct page *page) * * See also __split_huge_pmd_locked() and page_remove_anon_compound_rmap(). */ -static inline int PageDoubleMap(struct page *page) -{ - return PageHead(page) && test_bit(PG_double_map, &page[1].flags); -} - -static inline void SetPageDoubleMap(struct page *page) -{ - VM_BUG_ON_PAGE(!PageHead(page), page); - set_bit(PG_double_map, &page[1].flags); -} - -static inline void ClearPageDoubleMap(struct page *page) -{ - VM_BUG_ON_PAGE(!PageHead(page), page); - clear_bit(PG_double_map, &page[1].flags); -} -static inline int TestSetPageDoubleMap(struct page *page) -{ - VM_BUG_ON_PAGE(!PageHead(page), page); - return test_and_set_bit(PG_double_map, &page[1].flags); -} - -static inline int TestClearPageDoubleMap(struct page *page) -{ - VM_BUG_ON_PAGE(!PageHead(page), page); - return test_and_clear_bit(PG_double_map, &page[1].flags); -} - +PAGEFLAG(DoubleMap, double_map, PF_SECOND) + TESTSCFLAG(DoubleMap, double_map, PF_SECOND) #else TESTPAGEFLAG_FALSE(TransHuge) TESTPAGEFLAG_FALSE(TransCompound) TESTPAGEFLAG_FALSE(TransCompoundMap) TESTPAGEFLAG_FALSE(TransTail) PAGEFLAG_FALSE(DoubleMap) - TESTSETFLAG_FALSE(DoubleMap) - TESTCLEARFLAG_FALSE(DoubleMap) + TESTSCFLAG_FALSE(DoubleMap) #endif /* @@ -888,6 +867,7 @@ static inline int page_has_private(struct page *page) #undef PF_ONLY_HEAD #undef PF_NO_TAIL #undef PF_NO_COMPOUND +#undef PF_SECOND #endif /* !__GENERATING_BOUNDS_H */ #endif /* PAGE_FLAGS_H */ -- cgit v1.2.3 From c78f463649d60f4ed12df97a32def4d77b583853 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 13 Oct 2020 16:54:21 -0700 Subject: mm: remove src/dst mm parameter in copy_page_range() Both of the mm pointers are not needed after commit 7a4830c380f3 ("mm/fork: Pass new vma pointer into copy_page_range()"). Jason Gunthorpe also reported that the ordering of copy_page_range() is odd. Since working at it, reorder the parameters to be logical, by (1) always put the dst_* fields to be before src_* fields, and (2) keep the same type of parameters together. [peterx@redhat.com: further reorder some parameters and line format, per Jason] Link: https://lkml.kernel.org/r/20201002192647.7161-1-peterx@redhat.com [peterx@redhat.com: fix warnings] Link: https://lkml.kernel.org/r/20201006200138.GA6026@xz-x1 Reported-by: Kirill A. Shutemov Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: Jason Gunthorpe Acked-by: Kirill A. Shutemov Link: https://lkml.kernel.org/r/20200930204950.6668-1-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5320e7ab843f..620961e4f32b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1653,8 +1653,8 @@ struct mmu_notifier_range; void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); -int copy_page_range(struct mm_struct *dst, struct mm_struct *src, - struct vm_area_struct *vma, struct vm_area_struct *new); +int +copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); int follow_pte_pmd(struct mm_struct *mm, unsigned long address, struct mmu_notifier_range *range, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); -- cgit v1.2.3 From f577e143d85aa7ea3d9c62c607ad00fc46a5730c Mon Sep 17 00:00:00 2001 From: yuleixzhang Date: Tue, 13 Oct 2020 16:54:25 -0700 Subject: include/linux/huge_mm.h: remove mincore_huge_pmd declaration As mincore_huge_pmd() was dropped, remove the declaration from the header file. Signed-off-by: Yulei Zhang Signed-off-by: Andrew Morton Reviewed-by: Zi Yan Link: https://lkml.kernel.org/r/20200922083423.15074-1-yuleixzhang@tencent.com Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 8a8bc46a2432..0365aa97f8e7 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -38,9 +38,6 @@ extern int zap_huge_pmd(struct mmu_gather *tlb, extern int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, unsigned long addr); -extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, - unsigned char *vec); extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd); -- cgit v1.2.3 From 393824f650fabf6ea32bb09bea7acbc3a062dac8 Mon Sep 17 00:00:00 2001 From: Patricia Alfonso Date: Tue, 13 Oct 2020 16:54:58 -0700 Subject: kasan/kunit: add KUnit Struct to Current Task Patch series "KASAN-KUnit Integration", v14. This patchset contains everything needed to integrate KASAN and KUnit. KUnit will be able to: (1) Fail tests when an unexpected KASAN error occurs (2) Pass tests when an expected KASAN error occurs Convert KASAN tests to KUnit with the exception of copy_user_test because KUnit is unable to test those. Add documentation on how to run the KASAN tests with KUnit and what to expect when running these tests. This patch (of 5): In order to integrate debugging tools like KASAN into the KUnit framework, add KUnit struct to the current task to keep track of the current KUnit test. Signed-off-by: Patricia Alfonso Signed-off-by: David Gow Signed-off-by: Andrew Morton Tested-by: Andrey Konovalov Reviewed-by: Brendan Higgins Cc: Brendan Higgins Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Juri Lelli Cc: Vincent Guittot Cc: Shuah Khan Link: https://lkml.kernel.org/r/20200915035828.570483-1-davidgow@google.com Link: https://lkml.kernel.org/r/20200915035828.570483-2-davidgow@google.com Link: https://lkml.kernel.org/r/20200910070331.3358048-1-davidgow@google.com Link: https://lkml.kernel.org/r/20200910070331.3358048-2-davidgow@google.com Signed-off-by: Linus Torvalds --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 829b0697d19c..9030f3abd969 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1208,6 +1208,10 @@ struct task_struct { #endif #endif +#if IS_ENABLED(CONFIG_KUNIT) + struct kunit *kunit_test; +#endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack: */ int curr_ret_stack; -- cgit v1.2.3 From 83c4e7a0363bdb8104f510370907161623e31086 Mon Sep 17 00:00:00 2001 From: Patricia Alfonso Date: Tue, 13 Oct 2020 16:55:02 -0700 Subject: KUnit: KASAN Integration Integrate KASAN into KUnit testing framework. - Fail tests when KASAN reports an error that is not expected - Use KUNIT_EXPECT_KASAN_FAIL to expect a KASAN error in KASAN tests - Expected KASAN reports pass tests and are still printed when run without kunit_tool (kunit_tool still bypasses the report due to the test passing) - KUnit struct in current task used to keep track of the current test from KASAN code Make use of "[PATCH v3 kunit-next 1/2] kunit: generalize kunit_resource API beyond allocated resources" and "[PATCH v3 kunit-next 2/2] kunit: add support for named resources" from Alan Maguire [1] - A named resource is added to a test when a KASAN report is expected - This resource contains a struct for kasan_data containing booleans representing if a KASAN report is expected and if a KASAN report is found [1] (https://lore.kernel.org/linux-kselftest/1583251361-12748-1-git-send-email-alan.maguire@oracle.com/T/#t) Signed-off-by: Patricia Alfonso Signed-off-by: David Gow Signed-off-by: Andrew Morton Tested-by: Andrey Konovalov Reviewed-by: Andrey Konovalov Reviewed-by: Dmitry Vyukov Acked-by: Brendan Higgins Cc: Andrey Ryabinin Cc: Ingo Molnar Cc: Juri Lelli Cc: Peter Zijlstra Cc: Shuah Khan Cc: Vincent Guittot Link: https://lkml.kernel.org/r/20200915035828.570483-3-davidgow@google.com Link: https://lkml.kernel.org/r/20200910070331.3358048-3-davidgow@google.com Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 087fba34b209..30d343b4a40a 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -14,6 +14,12 @@ struct task_struct; #include #include +/* kasan_data struct is used in KUnit tests for KASAN expected failures */ +struct kunit_kasan_expectation { + bool report_expected; + bool report_found; +}; + extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE]; extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD]; -- cgit v1.2.3 From 9181a980625a45425085ccec0fc38074a16470a5 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 13 Oct 2020 16:55:35 -0700 Subject: mm: document semantics of ZONE_MOVABLE Let's document what ZONE_MOVABLE means, how it's used, and which special cases we have regarding unmovable pages (memory offlining vs. migration / allocations). Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Acked-by: Mike Rapoport Cc: Michal Hocko Cc: Michael S. Tsirkin Cc: Mike Kravetz Cc: Pankaj Gupta Cc: Baoquan He Cc: Jason Wang Cc: Qian Cai Link: http://lkml.kernel.org/r/20200816125333.7434-7-david@redhat.com Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0f7a4ff4b059..927bd7e98a88 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -396,6 +396,41 @@ enum zone_type { */ ZONE_HIGHMEM, #endif + /* + * ZONE_MOVABLE is similar to ZONE_NORMAL, except that it contains + * movable pages with few exceptional cases described below. Main use + * cases for ZONE_MOVABLE are to make memory offlining/unplug more + * likely to succeed, and to locally limit unmovable allocations - e.g., + * to increase the number of THP/huge pages. Notable special cases are: + * + * 1. Pinned pages: (long-term) pinning of movable pages might + * essentially turn such pages unmovable. Memory offlining might + * retry a long time. + * 2. memblock allocations: kernelcore/movablecore setups might create + * situations where ZONE_MOVABLE contains unmovable allocations + * after boot. Memory offlining and allocations fail early. + * 3. Memory holes: kernelcore/movablecore setups might create very rare + * situations where ZONE_MOVABLE contains memory holes after boot, + * for example, if we have sections that are only partially + * populated. Memory offlining and allocations fail early. + * 4. PG_hwpoison pages: while poisoned pages can be skipped during + * memory offlining, such pages cannot be allocated. + * 5. Unmovable PG_offline pages: in paravirtualized environments, + * hotplugged memory blocks might only partially be managed by the + * buddy (e.g., via XEN-balloon, Hyper-V balloon, virtio-mem). The + * parts not manged by the buddy are unmovable PG_offline pages. In + * some cases (virtio-mem), such pages can be skipped during + * memory offlining, however, cannot be moved/allocated. These + * techniques might use alloc_contig_range() to hide previously + * exposed pages from the buddy again (e.g., to implement some sort + * of memory unplug in virtio-mem). + * + * In general, no unmovable allocations that degrade memory offlining + * should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range()) + * have to expect that migrating pages in ZONE_MOVABLE can fail (even + * if has_unmovable_pages() states that there are no unmovable pages, + * there can be false negatives). + */ ZONE_MOVABLE, #ifdef CONFIG_ZONE_DEVICE ZONE_DEVICE, -- cgit v1.2.3 From 30d8ec73e8772b32a7eae626d14004bd37d8f13c Mon Sep 17 00:00:00 2001 From: Mateusz Nosek Date: Tue, 13 Oct 2020 16:55:57 -0700 Subject: mmzone: clean code by removing unused macro parameter Previously 'for_next_zone_zonelist_nodemask' macro parameter 'zlist' was unused so this patch removes it. Signed-off-by: Mateusz Nosek Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200917211906.30059-1-mateusznosek0@gmail.com Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 927bd7e98a88..c27fb1faffe5 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1116,7 +1116,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, z = next_zones_zonelist(++z, highidx, nodemask), \ zone = zonelist_zone(z)) -#define for_next_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ +#define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) \ for (zone = z->zone; \ zone; \ z = next_zones_zonelist(++z, highidx, nodemask), \ -- cgit v1.2.3 From ab00db216c9c78cc0a68bc4e27889c1ee374598d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 13 Oct 2020 16:56:07 -0700 Subject: include/linux/gfp.h: clarify usage of GFP_ATOMIC in !preemptible contexts There is a general understanding that GFP_ATOMIC/GFP_NOWAIT are to be used from atomic contexts. E.g. from within a spin lock or from the IRQ context. This is correct but there are some atomic contexts where the above doesn't hold. One of them would be an NMI context. Page allocator has never supported that and the general fear of this context didn't let anybody to actually even try to use the allocator there. Good, but let's be more specific about that. Another such a context, and that is where people seem to be more daring, is raw_spin_lock. Mostly because it simply resembles regular spin lock which is supported by the allocator and there is not any implementation difference with !RT kernels in the first place. Be explicit that such a context is not supported by the allocator. The underlying reason is that zone->lock would have to become raw_spin_lock as well and that has turned out to be a problem for RT (http://lkml.kernel.org/r/87mu305c1w.fsf@nanos.tec.linutronix.de). Signed-off-by: Michal Hocko Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Thomas Gleixner Reviewed-by: Uladzislau Rezki Cc: "Paul E. McKenney" Link: https://lkml.kernel.org/r/20200929123010.5137-1-mhocko@kernel.org Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 67a0774e080b..2e8370cf60c7 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -238,7 +238,9 @@ struct vm_area_struct; * %__GFP_FOO flags as necessary. * * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower - * watermark is applied to allow access to "atomic reserves" + * watermark is applied to allow access to "atomic reserves". + * The current implementation doesn't support NMI and few other strict + * non-preemptive contexts (e.g. raw_spin_lock). The same applies to %GFP_NOWAIT. * * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim. -- cgit v1.2.3 From 74c9da4e1dc0ecf70e7fa78568821e3ed8f77938 Mon Sep 17 00:00:00 2001 From: Mateusz Nosek Date: Tue, 13 Oct 2020 16:57:01 -0700 Subject: include/linux/compaction.h: clean code by removing unused enum value The enum value 'COMPACT_INACTIVE' is never used so can be removed. Signed-off-by: Mateusz Nosek Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200917110750.12015-1-mateusznosek0@gmail.com Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 25a521d299c1..1de5a1151ee7 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -29,9 +29,6 @@ enum compact_result { /* compaction didn't start as it was deferred due to past failures */ COMPACT_DEFERRED, - /* compaction not active last round */ - COMPACT_INACTIVE = COMPACT_DEFERRED, - /* For more detailed tracepoint output - internal to compaction */ COMPACT_NO_SUITABLE_PAGE, /* compaction should continue to another pageblock */ -- cgit v1.2.3 From f8fd52535c7326d72645c9878d7897aaf44db51c Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 13 Oct 2020 16:57:11 -0700 Subject: mm: remove unused alloc_page_vma_node() No one use this macro anymore. Also fix code style of policy_node(). Signed-off-by: Wei Yang Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Link: https://lkml.kernel.org/r/20200921021401.84508-1-richard.weiyang@linux.alibaba.com Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2e8370cf60c7..07e481993ef5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -562,8 +562,6 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) -#define alloc_page_vma_node(gfp_mask, vma, addr, node) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); -- cgit v1.2.3 From cd991db8ddc33d2d2c7af45627fc48352915001c Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 13 Oct 2020 16:57:49 -0700 Subject: memblock: make for_each_memblock_type() iterator private for_each_memblock_type() is not used outside mm/memblock.c, move it there from include/linux/memblock.h Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Reviewed-by: Baoquan He Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Daniel Axtens Cc: Dave Hansen Cc: Emil Renner Berthing Cc: Hari Bathini Cc: Ingo Molnar Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Marek Szyprowski Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Miguel Ojeda Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Russell King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Will Deacon Cc: Yoshinori Sato Link: https://lkml.kernel.org/r/20200818151634.14343-9-rppt@kernel.org Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 9d925db0d355..550faf69fc1c 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -552,11 +552,6 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ region++) -#define for_each_memblock_type(i, memblock_type, rgn) \ - for (i = 0, rgn = &memblock_type->regions[0]; \ - i < memblock_type->cnt; \ - i++, rgn = &memblock_type->regions[i]) - extern void *alloc_large_system_hash(const char *tablename, unsigned long bucketsize, unsigned long numentries, -- cgit v1.2.3 From 87c55870f01266fe22f345a0767162f85f1cf8f1 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 13 Oct 2020 16:57:54 -0700 Subject: memblock: make memblock_debug and related functionality private The only user of memblock_dbg() outside memblock was s390 setup code and it is converted to use pr_debug() instead. This allows to stop exposing memblock_debug and memblock_dbg() to the rest of the kernel. [akpm@linux-foundation.org: make memblock_dbg() safer and neater] Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Reviewed-by: Baoquan He Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Daniel Axtens Cc: Dave Hansen Cc: Emil Renner Berthing Cc: Hari Bathini Cc: Ingo Molnar Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Marek Szyprowski Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Miguel Ojeda Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Russell King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Will Deacon Cc: Yoshinori Sato Link: https://lkml.kernel.org/r/20200818151634.14343-10-rppt@kernel.org Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 550faf69fc1c..47a76e237fca 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -86,7 +86,6 @@ struct memblock { }; extern struct memblock memblock; -extern int memblock_debug; #ifndef CONFIG_ARCH_KEEP_MEMBLOCK #define __init_memblock __meminit @@ -98,9 +97,6 @@ void memblock_discard(void); static inline void memblock_discard(void) {} #endif -#define memblock_dbg(fmt, ...) \ - if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) - phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); void memblock_allow_resize(void); @@ -476,13 +472,7 @@ bool memblock_is_region_memory(phys_addr_t base, phys_addr_t size); bool memblock_is_reserved(phys_addr_t addr); bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); -extern void __memblock_dump_all(void); - -static inline void memblock_dump_all(void) -{ - if (memblock_debug) - __memblock_dump_all(); -} +void memblock_dump_all(void); /** * memblock_set_current_limit - Set the current allocation limit to allow -- cgit v1.2.3 From 6e245ad4a17ab92dba63406d3f517520a86c0a80 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 13 Oct 2020 16:57:59 -0700 Subject: memblock: reduce number of parameters in for_each_mem_range() Currently for_each_mem_range() and for_each_mem_range_rev() iterators are the most generic way to traverse memblock regions. As such, they have 8 parameters and they are hardly convenient to users. Most users choose to utilize one of their wrappers and the only user that actually needs most of the parameters is memblock itself. To avoid yet another naming for memblock iterators, rename the existing for_each_mem_range[_rev]() to __for_each_mem_range[_rev]() and add a new for_each_mem_range[_rev]() wrappers with only index, start and end parameters. The new wrapper nicely fits into init_unavailable_mem() and will be used in upcoming changes to simplify memblock traversals. Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Acked-by: Thomas Bogendoerfer [MIPS] Cc: Andy Lutomirski Cc: Baoquan He Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Daniel Axtens Cc: Dave Hansen Cc: Emil Renner Berthing Cc: Hari Bathini Cc: Ingo Molnar Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Marek Szyprowski Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Miguel Ojeda Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Russell King Cc: Stafford Horne Cc: Thomas Gleixner Cc: Will Deacon Cc: Yoshinori Sato Link: https://lkml.kernel.org/r/20200818151634.14343-11-rppt@kernel.org Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 47a76e237fca..27c3b84d1615 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -162,7 +162,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, #endif /* CONFIG_HAVE_MEMBLOCK_PHYS_MAP */ /** - * for_each_mem_range - iterate through memblock areas from type_a and not + * __for_each_mem_range - iterate through memblock areas from type_a and not * included in type_b. Or just type_a if type_b is NULL. * @i: u64 used as loop variable * @type_a: ptr to memblock_type to iterate @@ -173,7 +173,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL */ -#define for_each_mem_range(i, type_a, type_b, nid, flags, \ +#define __for_each_mem_range(i, type_a, type_b, nid, flags, \ p_start, p_end, p_nid) \ for (i = 0, __next_mem_range(&i, nid, flags, type_a, type_b, \ p_start, p_end, p_nid); \ @@ -182,7 +182,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, p_start, p_end, p_nid)) /** - * for_each_mem_range_rev - reverse iterate through memblock areas from + * __for_each_mem_range_rev - reverse iterate through memblock areas from * type_a and not included in type_b. Or just type_a if type_b is NULL. * @i: u64 used as loop variable * @type_a: ptr to memblock_type to iterate @@ -193,15 +193,36 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL */ -#define for_each_mem_range_rev(i, type_a, type_b, nid, flags, \ - p_start, p_end, p_nid) \ +#define __for_each_mem_range_rev(i, type_a, type_b, nid, flags, \ + p_start, p_end, p_nid) \ for (i = (u64)ULLONG_MAX, \ - __next_mem_range_rev(&i, nid, flags, type_a, type_b,\ + __next_mem_range_rev(&i, nid, flags, type_a, type_b, \ p_start, p_end, p_nid); \ i != (u64)ULLONG_MAX; \ __next_mem_range_rev(&i, nid, flags, type_a, type_b, \ p_start, p_end, p_nid)) +/** + * for_each_mem_range - iterate through memory areas. + * @i: u64 used as loop variable + * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL + */ +#define for_each_mem_range(i, p_start, p_end) \ + __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \ + MEMBLOCK_NONE, p_start, p_end, NULL) + +/** + * for_each_mem_range_rev - reverse iterate through memblock areas from + * type_a and not included in type_b. Or just type_a if type_b is NULL. + * @i: u64 used as loop variable + * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL + */ +#define for_each_mem_range_rev(i, p_start, p_end) \ + __for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \ + MEMBLOCK_NONE, p_start, p_end, NULL) + /** * for_each_reserved_mem_region - iterate over all reserved memblock areas * @i: u64 used as loop variable @@ -307,8 +328,8 @@ int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask); * soon as memblock is initialized. */ #define for_each_free_mem_range(i, nid, flags, p_start, p_end, p_nid) \ - for_each_mem_range(i, &memblock.memory, &memblock.reserved, \ - nid, flags, p_start, p_end, p_nid) + __for_each_mem_range(i, &memblock.memory, &memblock.reserved, \ + nid, flags, p_start, p_end, p_nid) /** * for_each_free_mem_range_reverse - rev-iterate through free memblock areas @@ -324,8 +345,8 @@ int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask); */ #define for_each_free_mem_range_reverse(i, nid, flags, p_start, p_end, \ p_nid) \ - for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved, \ - nid, flags, p_start, p_end, p_nid) + __for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved, \ + nid, flags, p_start, p_end, p_nid) int memblock_set_node(phys_addr_t base, phys_addr_t size, struct memblock_type *type, int nid); -- cgit v1.2.3 From 5bd0960b85d7e3e4a2dc5bbf1c87d0b505115d71 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 13 Oct 2020 16:58:20 -0700 Subject: memblock: remove unused memblock_mem_size() The only user of memblock_mem_size() was x86 setup code, it is gone now and memblock_mem_size() funciton can be removed. Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Reviewed-by: Baoquan He Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Daniel Axtens Cc: Dave Hansen Cc: Emil Renner Berthing Cc: Hari Bathini Cc: Ingo Molnar Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Marek Szyprowski Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Miguel Ojeda Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Russell King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Will Deacon Cc: Yoshinori Sato Link: https://lkml.kernel.org/r/20200818151634.14343-16-rppt@kernel.org Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 27c3b84d1615..15ed119701c1 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -481,7 +481,6 @@ static inline bool memblock_bottom_up(void) phys_addr_t memblock_phys_mem_size(void); phys_addr_t memblock_reserved_size(void); -phys_addr_t memblock_mem_size(unsigned long limit_pfn); phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); void memblock_enforce_memory_limit(phys_addr_t memory_limit); -- cgit v1.2.3 From 9f3d5eaa3c60f95d9fff1ce4eea7553a3dc04906 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 13 Oct 2020 16:58:25 -0700 Subject: memblock: implement for_each_reserved_mem_region() using __next_mem_region() Iteration over memblock.reserved with for_each_reserved_mem_region() used __next_reserved_mem_region() that implemented a subset of __next_mem_region(). Use __for_each_mem_range() and, essentially, __next_mem_region() with appropriate parameters to reduce code duplication. While on it, rename for_each_reserved_mem_region() to for_each_reserved_mem_range() for consistency. Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Acked-by: Miguel Ojeda [.clang-format] Cc: Andy Lutomirski Cc: Baoquan He Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Daniel Axtens Cc: Dave Hansen Cc: Emil Renner Berthing Cc: Hari Bathini Cc: Ingo Molnar Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Marek Szyprowski Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Russell King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Will Deacon Cc: Yoshinori Sato Link: https://lkml.kernel.org/r/20200818151634.14343-17-rppt@kernel.org Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 15ed119701c1..354078713cd1 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -132,9 +132,6 @@ void __next_mem_range_rev(u64 *idx, int nid, enum memblock_flags flags, struct memblock_type *type_b, phys_addr_t *out_start, phys_addr_t *out_end, int *out_nid); -void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, - phys_addr_t *out_end); - void __memblock_free_late(phys_addr_t base, phys_addr_t size); #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP @@ -224,7 +221,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, MEMBLOCK_NONE, p_start, p_end, NULL) /** - * for_each_reserved_mem_region - iterate over all reserved memblock areas + * for_each_reserved_mem_range - iterate over all reserved memblock areas * @i: u64 used as loop variable * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL @@ -232,10 +229,9 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, * Walks over reserved areas of memblock. Available as soon as memblock * is initialized. */ -#define for_each_reserved_mem_region(i, p_start, p_end) \ - for (i = 0UL, __next_reserved_mem_region(&i, p_start, p_end); \ - i != (u64)ULLONG_MAX; \ - __next_reserved_mem_region(&i, p_start, p_end)) +#define for_each_reserved_mem_range(i, p_start, p_end) \ + __for_each_mem_range(i, &memblock.reserved, NULL, NUMA_NO_NODE, \ + MEMBLOCK_NONE, p_start, p_end, NULL) static inline bool memblock_is_hotpluggable(struct memblock_region *m) { -- cgit v1.2.3 From cc6de1680538633e4ef9540b2313fa2481a7c641 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 13 Oct 2020 16:58:30 -0700 Subject: memblock: use separate iterators for memory and reserved regions for_each_memblock() is used to iterate over memblock.memory in a few places that use data from memblock_region rather than the memory ranges. Introduce separate for_each_mem_region() and for_each_reserved_mem_region() to improve encapsulation of memblock internals from its users. Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Reviewed-by: Baoquan He Acked-by: Ingo Molnar [x86] Acked-by: Thomas Bogendoerfer [MIPS] Acked-by: Miguel Ojeda [.clang-format] Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Daniel Axtens Cc: Dave Hansen Cc: Emil Renner Berthing Cc: Hari Bathini Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Marek Szyprowski Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Russell King Cc: Stafford Horne Cc: Thomas Gleixner Cc: Will Deacon Cc: Yoshinori Sato Link: https://lkml.kernel.org/r/20200818151634.14343-18-rppt@kernel.org Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 354078713cd1..ef131255cedc 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -553,9 +553,22 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo return PFN_UP(reg->base + reg->size); } -#define for_each_memblock(memblock_type, region) \ - for (region = memblock.memblock_type.regions; \ - region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ +/** + * for_each_mem_region - itereate over memory regions + * @region: loop variable + */ +#define for_each_mem_region(region) \ + for (region = memblock.memory.regions; \ + region < (memblock.memory.regions + memblock.memory.cnt); \ + region++) + +/** + * for_each_reserved_mem_region - itereate over reserved memory regions + * @region: loop variable + */ +#define for_each_reserved_mem_region(region) \ + for (region = memblock.reserved.regions; \ + region < (memblock.reserved.regions + memblock.reserved.cnt); \ region++) extern void *alloc_large_system_hash(const char *tablename, -- cgit v1.2.3 From 67197a4f28d28d0b073ab0427b03cb2ee5382578 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 13 Oct 2020 16:58:35 -0700 Subject: mm, oom_adj: don't loop through tasks in __set_oom_adj when not necessary Currently __set_oom_adj loops through all processes in the system to keep oom_score_adj and oom_score_adj_min in sync between processes sharing their mm. This is done for any task with more that one mm_users, which includes processes with multiple threads (sharing mm and signals). However for such processes the loop is unnecessary because their signal structure is shared as well. Android updates oom_score_adj whenever a tasks changes its role (background/foreground/...) or binds to/unbinds from a service, making it more/less important. Such operation can happen frequently. We noticed that updates to oom_score_adj became more expensive and after further investigation found out that the patch mentioned in "Fixes" introduced a regression. Using Pixel 4 with a typical Android workload, write time to oom_score_adj increased from ~3.57us to ~362us. Moreover this regression linearly depends on the number of multi-threaded processes running on the system. Mark the mm with a new MMF_MULTIPROCESS flag bit when task is created with (CLONE_VM && !CLONE_THREAD && !CLONE_VFORK). Change __set_oom_adj to use MMF_MULTIPROCESS instead of mm_users to decide whether oom_score_adj update should be synchronized between multiple processes. To prevent races between clone() and __set_oom_adj(), when oom_score_adj of the process being cloned might be modified from userspace, we use oom_adj_mutex. Its scope is changed to global. The combination of (CLONE_VM && !CLONE_THREAD) is rarely used except for the case of vfork(). To prevent performance regressions of vfork(), we skip taking oom_adj_mutex and setting MMF_MULTIPROCESS when CLONE_VFORK is specified. Clearing the MMF_MULTIPROCESS flag (when the last process sharing the mm exits) is left out of this patch to keep it simple and because it is believed that this threading model is rare. Should there ever be a need for optimizing that case as well, it can be done by hooking into the exit path, likely following the mm_update_next_owner pattern. With the combination of (CLONE_VM && !CLONE_THREAD && !CLONE_VFORK) being quite rare, the regression is gone after the change is applied. [surenb@google.com: v3] Link: https://lkml.kernel.org/r/20200902012558.2335613-1-surenb@google.com Fixes: 44a70adec910 ("mm, oom_adj: make sure processes sharing mm have same view of oom_score_adj") Reported-by: Tim Murray Suggested-by: Michal Hocko Signed-off-by: Suren Baghdasaryan Signed-off-by: Andrew Morton Acked-by: Christian Brauner Acked-by: Michal Hocko Acked-by: Oleg Nesterov Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Eugene Syromiatnikov Cc: Christian Kellner Cc: Adrian Reber Cc: Shakeel Butt Cc: Aleksa Sarai Cc: Alexey Dobriyan Cc: "Eric W. Biederman" Cc: Alexey Gladkov Cc: Michel Lespinasse Cc: Daniel Jordan Cc: Andrei Vagin Cc: Bernd Edlinger Cc: John Johansen Cc: Yafang Shao Link: https://lkml.kernel.org/r/20200824153036.3201505-1-surenb@google.com Debugged-by: Minchan Kim Signed-off-by: Linus Torvalds --- include/linux/oom.h | 1 + include/linux/sched/coredump.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index f022f581ac29..2db9a1432511 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -55,6 +55,7 @@ struct oom_control { }; extern struct mutex oom_lock; +extern struct mutex oom_adj_mutex; static inline void set_current_oom_origin(void) { diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index ecdc6542070f..dfd82eab2902 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -72,6 +72,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ +#define MMF_MULTIPROCESS 27 /* mm is shared between processes */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ -- cgit v1.2.3 From ed3e453798d4f81c99056aa09fcd79d0874a60fd Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 13 Oct 2020 14:14:43 +0200 Subject: locking/seqlocks: Fix kernel-doc warnings Right now, seqlock.h produces kernel-doc warnings: ./include/linux/seqlock.h:181: error: Cannot parse typedef! Convert it to a plain comment to avoid confusing kernel-doc. Fixes: a8772dccb2ec ("seqlock: Fold seqcount_LOCKNAME_t definition") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/a59144cdaadf7fdf1fe5d55d0e1575abbf1c0cb3.1602590106.git.mchehab+huawei@kernel.org --- include/linux/seqlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index ac5b07f558b0..cbfc78b92b65 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -154,7 +154,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) #define __SEQ_LOCK(expr) #endif -/** +/* * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated * @seqcount: The real sequence counter * @lock: Pointer to the associated lock -- cgit v1.2.3 From 02e83f46ebfaf9405881e290794c913d457541f0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 14 Oct 2020 16:47:08 -0700 Subject: vfs: move generic_remap_checks out of mm I would like to move all the generic helpers for the vfs remap range functionality (aka clonerange and dedupe) into a separate file so that they won't be scattered across the vfs and the mm subsystems. The eventual goal is to be able to deselect remap_range.c if none of the filesystems need that code, but the tricky part here is picking a stable(ish) part of the merge window to rearrange code. Signed-off-by: Darrick J. Wong --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7519ae003a08..eea754a8dd67 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3012,6 +3012,8 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern int generic_remap_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); +extern int generic_write_check_limits(struct file *file, loff_t pos, + loff_t *count); extern int generic_file_rw_checks(struct file *file_in, struct file *file_out); extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, -- cgit v1.2.3 From c2a9a645591f11761fdbe3c7f45c0ad32af97eb4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 28 Sep 2020 15:25:17 +0200 Subject: math64.h: kernel-docs: Convert some markups into normal comments There are several functions at math64.h that are also defined at div64.c. As both are included at kernel-api.rst, Sphinx 3.x complains about symbol duplication: ./lib/math/div64.c:73: WARNING: Duplicate C declaration, also defined in 'core-api/kernel-api'. Declaration is 'div_s64_rem'. ./lib/math/div64.c:104: WARNING: Duplicate C declaration, also defined in 'core-api/kernel-api'. Declaration is 'div64_u64_rem'. ./lib/math/div64.c:144: WARNING: Duplicate C declaration, also defined in 'core-api/kernel-api'. Declaration is 'div64_u64'. ./lib/math/div64.c:172: WARNING: Duplicate C declaration, also defined in 'core-api/kernel-api'. Declaration is 'div64_s64'. In order to avoid Sphinx warnings about duplication, change the kernel-doc markups to just comments at math64.h. Reviewed-by: Vincenzo Frascino Signed-off-by: Mauro Carvalho Chehab --- include/linux/math64.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/math64.h b/include/linux/math64.h index 3381d9e33c4e..66deb1fdc2ef 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -28,7 +28,7 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) return dividend / divisor; } -/** +/* * div_s64_rem - signed 64bit divide with 32bit divisor with remainder * @dividend: signed 64bit dividend * @divisor: signed 32bit divisor @@ -42,7 +42,7 @@ static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) return dividend / divisor; } -/** +/* * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder * @dividend: unsigned 64bit dividend * @divisor: unsigned 64bit divisor @@ -56,7 +56,7 @@ static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) return dividend / divisor; } -/** +/* * div64_u64 - unsigned 64bit divide with 64bit divisor * @dividend: unsigned 64bit dividend * @divisor: unsigned 64bit divisor @@ -68,7 +68,7 @@ static inline u64 div64_u64(u64 dividend, u64 divisor) return dividend / divisor; } -/** +/* * div64_s64 - signed 64bit divide with 64bit divisor * @dividend: signed 64bit dividend * @divisor: signed 64bit divisor -- cgit v1.2.3 From 1b7743912bcf2b4a9d9c96de668542156450554f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 10 Sep 2020 09:25:14 +0200 Subject: usb: docs: document altmode register/unregister functions The typec_bus.rst asks for documentation of those two functions, but they don't exist: ./drivers/usb/typec/bus.c:1: warning: 'typec_altmode_unregister_driver' not found ./drivers/usb/typec/bus.c:1: warning: 'typec_altmode_register_driver' not found Also, they're not declared on bus.c but, instead, at a header file (typec_altmode.h). So, add documentation for both functions at the header and change the kernel-doc markup under typec_bus.rst to point to the right place. While here, also place the documentation for both structs declared on typec_altmode.h at the rst file. Signed-off-by: Mauro Carvalho Chehab --- include/linux/usb/typec_altmode.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec_altmode.h b/include/linux/usb/typec_altmode.h index a4b65eaa0f62..5e0a7b7647c3 100644 --- a/include/linux/usb/typec_altmode.h +++ b/include/linux/usb/typec_altmode.h @@ -152,10 +152,26 @@ struct typec_altmode_driver { #define to_altmode_driver(d) container_of(d, struct typec_altmode_driver, \ driver) +/** + * typec_altmode_register_driver - registers a USB Type-C alternate mode + * device driver + * @drv: pointer to struct typec_altmode_driver + * + * These drivers will be bind to the partner alternate mode devices. They will + * handle all SVID specific communication. + */ #define typec_altmode_register_driver(drv) \ __typec_altmode_register_driver(drv, THIS_MODULE) int __typec_altmode_register_driver(struct typec_altmode_driver *drv, struct module *module); +/** + * typec_altmode_unregister_driver - unregisters a USB Type-C alternate mode + * device driver + * @drv: pointer to struct typec_altmode_driver + * + * These drivers will be bind to the partner alternate mode devices. They will + * handle all SVID specific communication. + */ void typec_altmode_unregister_driver(struct typec_altmode_driver *drv); #define module_typec_altmode_driver(__typec_altmode_driver) \ -- cgit v1.2.3 From f2c6855159228861f4552d563c13f5b194b9889f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 10 Sep 2020 10:27:17 +0200 Subject: Input: sparse-keymap: add a description for @sw Add a description for it, in order to avoids this warning: ./include/linux/input/sparse-keymap.h:43: warning: Function parameter or member 'sw' not described in 'key_entry' Signed-off-by: Mauro Carvalho Chehab --- include/linux/input/sparse-keymap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/input/sparse-keymap.h b/include/linux/input/sparse-keymap.h index d25d1452dc6e..d0dddc14ebc8 100644 --- a/include/linux/input/sparse-keymap.h +++ b/include/linux/input/sparse-keymap.h @@ -20,6 +20,7 @@ * private definitions. * @code: Device-specific data identifying the button/switch * @keycode: KEY_* code assigned to a key/button + * @sw: struct with code/value used by KE_SW and KE_VSW * @sw.code: SW_* code assigned to a switch * @sw.value: Value that should be sent in an input even when KE_SW * switch is toggled. KE_VSW switches ignore this field and -- cgit v1.2.3 From 770c03e6dabacd5b9f57bba93c4311d32b618640 Mon Sep 17 00:00:00 2001 From: Fei Shao Date: Thu, 8 Oct 2020 17:34:14 +0800 Subject: rtc: mt6397: Remove unused member dev Removing the struct member "dev" in mt6397 RTC driver because it's not initialized and the only usage is for one debugging message. Also fixed a typo in the error message. Signed-off-by: Fei Shao Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201008093414.1911699-1-fshao@chromium.org --- include/linux/mfd/mt6397/rtc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/mt6397/rtc.h b/include/linux/mfd/mt6397/rtc.h index 66989a16221a..c3748b53bf7d 100644 --- a/include/linux/mfd/mt6397/rtc.h +++ b/include/linux/mfd/mt6397/rtc.h @@ -72,7 +72,6 @@ struct mtk_rtc_data { }; struct mt6397_rtc { - struct device *dev; struct rtc_device *rtc_dev; /* Protect register access from multiple tasks */ -- cgit v1.2.3 From 1b2c54d63cde7e8cf15aa6319aba168d81c7e364 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 14 Oct 2020 16:38:47 -0700 Subject: vfs: move the remap range helpers to remap_range.c Complete the migration by moving the file remapping helper functions out of read_write.c and into remap_range.c. This reduces the clutter in the first file and (eventually) will make it so that we can compile out the second file if it isn't needed. Signed-off-by: Darrick J. Wong --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index eea754a8dd67..073da53b59b0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3009,9 +3009,6 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); -extern int generic_remap_checks(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t *count, unsigned int remap_flags); extern int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count); extern int generic_file_rw_checks(struct file *file_in, struct file *file_out); -- cgit v1.2.3 From 407e9c63ee571f44a2dfb0828fc30daa02abb6dc Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 15 Oct 2020 09:21:17 -0700 Subject: vfs: move the generic write and copy checks out of mm The generic write check helpers also don't have much to do with the page cache, so move them to the vfs. Signed-off-by: Darrick J. Wong --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 073da53b59b0..8fb063ab7d50 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3012,9 +3012,6 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count); extern int generic_file_rw_checks(struct file *file_in, struct file *file_out); -extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t *count, unsigned int flags); extern ssize_t generic_file_buffered_read(struct kiocb *iocb, struct iov_iter *to, ssize_t already_read); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); -- cgit v1.2.3 From ac80cd17a615e472e3dcff0a5446a58540e64e6d Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Wed, 14 Oct 2020 09:16:01 -0700 Subject: dma-buf: Clarify that dma-buf sg lists are page aligned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dma-buf API have been used under the assumption that the sg lists returned from dma_buf_map_attachment() are fully page aligned. Lots of stuff can break otherwise all over the place. Clarify this in the documentation and add a check when DMA API debug is enabled. Signed-off-by: Jianxin Xiong Reviewed-by: Christian König Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1602692161-107096-1-git-send-email-jianxin.xiong@intel.com --- include/linux/dma-buf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index cf77cc15f4ba..03875eaed51a 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -146,7 +146,8 @@ struct dma_buf_ops { * * A &sg_table scatter list of or the backing storage of the DMA buffer, * already mapped into the device address space of the &device attached - * with the provided &dma_buf_attachment. + * with the provided &dma_buf_attachment. The addresses and lengths in + * the scatter list are PAGE_SIZE aligned. * * On failure, returns a negative error value wrapped into a pointer. * May also return -EINTR when a signal was received while being -- cgit v1.2.3 From 3e2ac9798e13ad1f52d735ea2ea1d252cb140ae5 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 5 Oct 2020 10:58:18 +0200 Subject: PM / devfreq: remove a duplicated kernel-doc markup The update_devfreq() is also documented at devfreq.c, which has a more complete note. So, drop the duplicated markup, in order to avoid this warning: .../Documentation/driver-api/device_link.rst: WARNING: Duplicate C declaration, also defined in 'driver-api/infrastructure'. Declaration is 'device_link_state'. (and to cause a problem with cross-references to it) Signed-off-by: Mauro Carvalho Chehab --- include/linux/devfreq.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 2f4a74efa6be..121a2430d7f7 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -228,12 +228,7 @@ int devfreq_resume_device(struct devfreq *devfreq); void devfreq_suspend(void); void devfreq_resume(void); -/** - * update_devfreq() - Reevaluate the device and configure frequency - * @devfreq: the devfreq device - * - * Note: devfreq->lock must be held - */ +/* update_devfreq() - Reevaluate the device and configure frequency */ int update_devfreq(struct devfreq *devfreq); /* Helper functions for devfreq user device driver with OPP. */ -- cgit v1.2.3 From bae314dd5d8dfdd90ee584003a0f8c06e1bf3ea2 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 15 Oct 2020 16:44:27 +0200 Subject: cpuidle: Remove pointless stub The cpuidle.h header is declaring a function with an empty stub for the cpuidle disabled case, but that function is only called by cpuidle governors which depend on cpuidle anyway. In other words, the function is only called when cpuidle is enabled, so there is no need for the stub. Remove the pointless stub. Signed-off-by: Daniel Lezcano [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index ed0da0e58e8b..bd605b5585cf 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -271,13 +271,8 @@ struct cpuidle_governor { void (*reflect) (struct cpuidle_device *dev, int index); }; -#ifdef CONFIG_CPU_IDLE extern int cpuidle_register_governor(struct cpuidle_governor *gov); extern s64 cpuidle_governor_latency_req(unsigned int cpu); -#else -static inline int cpuidle_register_governor(struct cpuidle_governor *gov) -{return 0;} -#endif #define __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, \ idx, \ -- cgit v1.2.3 From f1e8d7560d3051b38f73a0cf6acc1b0bf5305ad9 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 13 Oct 2020 15:42:41 +0800 Subject: powercap/intel_rapl: enumerate Psys RAPL domain together with package RAPL domain On multi-package systems, the Psys MSR is only valid for CPUs on specific package (master package). The current code makes the assumption that package 0 is the master package, but this is not true on new platforms like SPR. Fix the problem by emuerating the Psys RAPL domain for every package, so CPUs in slave packages will read 0 for the Psys energy counter and only CPUs in master packages can get a valid reading and register the Psys RAPL domain. The sysfs I/F for the Psys RAPL domain is not changed. Signed-off-by: Zhang Rui [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/intel_rapl.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index 3582176a1eca..50b8398ffd21 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -79,8 +79,10 @@ struct rapl_power_limit { struct rapl_package; +#define RAPL_DOMAIN_NAME_LENGTH 16 + struct rapl_domain { - const char *name; + char name[RAPL_DOMAIN_NAME_LENGTH]; enum rapl_domain_type id; u64 regs[RAPL_DOMAIN_REG_MAX]; struct powercap_zone power_zone; @@ -152,7 +154,4 @@ struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv); void rapl_remove_package(struct rapl_package *rp); -int rapl_add_platform_domain(struct rapl_if_priv *priv); -void rapl_remove_platform_domain(struct rapl_if_priv *priv); - #endif /* __INTEL_RAPL_H__ */ -- cgit v1.2.3 From d4f8138354b9ec290de0c7ba527a945c5549e32b Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 13 Oct 2020 14:23:39 +0200 Subject: PM: domains: Add support for PM domain on/off notifiers for genpd A device may have specific HW constraints that must be obeyed to, before its corresponding PM domain (genpd) can be powered off - and vice verse at power on. These constraints can't be managed through the regular runtime PM based deployment for a device, because the access pattern for it, isn't always request based. In other words, using the runtime PM callbacks to deal with the constraints doesn't work for these cases. For these reasons, let's instead add a PM domain power on/off notification mechanism to genpd. To add/remove a notifier for a device, the device must already have been attached to the genpd, which also means that it needs to be a part of the PM domain topology. To add/remove a notifier, let's introduce two genpd specific functions: - dev_pm_genpd_add|remove_notifier() Note that, to further clarify when genpd power on/off notifiers may be used, one can compare with the existing CPU_CLUSTER_PM_ENTER|EXIT notifiers. In the long run, the genpd power on/off notifiers should be able to replace them, but that requires additional genpd based platform support for the current users. Signed-off-by: Ulf Hansson Tested-by: Lina Iyer Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 66f3c5d64d81..db039da0aba2 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -68,6 +68,13 @@ enum gpd_status { GENPD_STATE_OFF, /* PM domain is off */ }; +enum genpd_notication { + GENPD_NOTIFY_PRE_OFF = 0, + GENPD_NOTIFY_OFF, + GENPD_NOTIFY_PRE_ON, + GENPD_NOTIFY_ON, +}; + struct dev_power_governor { bool (*power_down_ok)(struct dev_pm_domain *domain); bool (*suspend_ok)(struct device *dev); @@ -112,6 +119,7 @@ struct generic_pm_domain { cpumask_var_t cpus; /* A cpumask of the attached CPUs */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); + struct raw_notifier_head power_notifiers; /* Power on/off notifiers */ struct opp_table *opp_table; /* OPP table of the genpd */ unsigned int (*opp_to_performance_state)(struct generic_pm_domain *genpd, struct dev_pm_opp *opp); @@ -178,6 +186,7 @@ struct generic_pm_domain_data { struct pm_domain_data base; struct gpd_timing_data td; struct notifier_block nb; + struct notifier_block *power_nb; int cpu; unsigned int performance_state; void *data; @@ -204,6 +213,8 @@ int pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); int pm_genpd_remove(struct generic_pm_domain *genpd); int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state); +int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb); +int dev_pm_genpd_remove_notifier(struct device *dev); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -251,6 +262,17 @@ static inline int dev_pm_genpd_set_performance_state(struct device *dev, return -ENOTSUPP; } +static inline int dev_pm_genpd_add_notifier(struct device *dev, + struct notifier_block *nb) +{ + return -ENOTSUPP; +} + +static inline int dev_pm_genpd_remove_notifier(struct device *dev) +{ + return -ENOTSUPP; +} + #define simple_qos_governor (*(struct dev_power_governor *)(NULL)) #define pm_domain_always_on_gov (*(struct dev_power_governor *)(NULL)) #endif -- cgit v1.2.3 From c6a113b52302adcfadda63af81dc05f7a669fbc8 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Thu, 15 Oct 2020 14:47:22 -0600 Subject: PM: domains: enable domain idle state accounting To enable better debug of PM domains, keep a track of successful and failing attempts to enter each domain idle state. This statistics are exported in debugfs when reading the idle_states node associated with each PM domain. Signed-off-by: Lina Iyer [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index db039da0aba2..1ad0ec481416 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -89,6 +89,8 @@ struct genpd_power_state { s64 power_off_latency_ns; s64 power_on_latency_ns; s64 residency_ns; + u64 usage; + u64 rejected; struct fwnode_handle *fwnode; ktime_t idle_time; void *data; -- cgit v1.2.3 From ecdf57b4f6748f3cb89eaf2ffdc9cfae4829f493 Mon Sep 17 00:00:00 2001 From: "Saheed O. Bolarinwa" Date: Thu, 15 Oct 2020 14:30:34 -0500 Subject: PCI/ASPM: Remove struct aspm_register_info.l1ss_cap_ptr Save the L1 Substates Capability pointer in struct pci_dev. Then we don't have to keep track of it in the struct aspm_register_info and struct pcie_link_state, which makes the code easier to read. No functional change intended. [bhelgaas: split to a separate patch] Link: https://lore.kernel.org/r/20201015193039.12585-8-helgaas@kernel.org Signed-off-by: Saheed O. Bolarinwa Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 835530605c0d..c5288cd71a2e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -380,6 +380,7 @@ struct pci_dev { struct pcie_link_state *link_state; /* ASPM link state */ unsigned int ltr_path:1; /* Latency Tolerance Reporting supported from root to here */ + int l1ss; /* L1SS Capability pointer */ #endif unsigned int eetlp_prefix_path:1; /* End-to-End TLP Prefix */ -- cgit v1.2.3 From 57417cebc96b57122a2207fc84a6077d20c84b4b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Oct 2020 20:05:13 -0700 Subject: XArray: add xa_get_order Patch series "Fix read-only THP for non-tmpfs filesystems". As described more verbosely in the [3/3] changelog, we can inadvertently put an order-0 page in the page cache which occupies 512 consecutive entries. Users are running into this if they enable the READ_ONLY_THP_FOR_FS config option; see https://bugzilla.kernel.org/show_bug.cgi?id=206569 and Qian Cai has also reported it here: https://lore.kernel.org/lkml/20200616013309.GB815@lca.pw/ This is a rather intrusive way of fixing the problem, but has the advantage that I've actually been testing it with the THP patches, which means that it sees far more use than it does upstream -- indeed, Song has been entirely unable to reproduce it. It also has the advantage that it removes a few patches from my gargantuan backlog of THP patches. This patch (of 3): This function returns the order of the entry at the index. We need this because there isn't space in the shadow entry to encode its order. [akpm@linux-foundation.org: export xa_get_order to modules] Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: "Kirill A . Shutemov" Cc: Qian Cai Cc: Song Liu Link: https://lkml.kernel.org/r/20200903183029.14930-1-willy@infradead.org Link: https://lkml.kernel.org/r/20200903183029.14930-2-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/xarray.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index b4d70e7568b2..5b7f4ebcf4ff 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1505,6 +1505,15 @@ void xas_pause(struct xa_state *); void xas_create_range(struct xa_state *); +#ifdef CONFIG_XARRAY_MULTI +int xa_get_order(struct xarray *, unsigned long index); +#else +static inline int xa_get_order(struct xarray *xa, unsigned long index) +{ + return 0; +} +#endif + /** * xas_reload() - Refetch an entry from the xarray. * @xas: XArray operation state. -- cgit v1.2.3 From 8fc75643c5e14574c8be59b69182452ece28315a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Oct 2020 20:05:16 -0700 Subject: XArray: add xas_split In order to use multi-index entries for huge pages in the page cache, we need to be able to split a multi-index entry (eg if a file is truncated in the middle of a huge page entry). This version does not support splitting more than one level of the tree at a time. This is an acceptable limitation for the page cache as we do not expect to support order-12 pages in the near future. [akpm@linux-foundation.org: export xas_split_alloc() to modules] [willy@infradead.org: fix xarray split] Link: https://lkml.kernel.org/r/20200910175450.GV6583@casper.infradead.org [willy@infradead.org: fix xarray] Link: https://lkml.kernel.org/r/20201001233943.GW20115@casper.infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: "Kirill A . Shutemov" Cc: Qian Cai Cc: Song Liu Link: https://lkml.kernel.org/r/20200903183029.14930-3-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/xarray.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 5b7f4ebcf4ff..5cdf441f6377 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1507,11 +1507,24 @@ void xas_create_range(struct xa_state *); #ifdef CONFIG_XARRAY_MULTI int xa_get_order(struct xarray *, unsigned long index); +void xas_split(struct xa_state *, void *entry, unsigned int order); +void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t); #else static inline int xa_get_order(struct xarray *xa, unsigned long index) { return 0; } + +static inline void xas_split(struct xa_state *xas, void *entry, + unsigned int order) +{ + xas_store(xas, entry); +} + +static inline void xas_split_alloc(struct xa_state *xas, void *entry, + unsigned int order, gfp_t gfp) +{ +} #endif /** -- cgit v1.2.3 From 8fb156c9ee2db94f7127c930c89917634a1a9f56 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Oct 2020 20:05:29 -0700 Subject: mm/page_owner: change split_page_owner to take a count The implementation of split_page_owner() prefers a count rather than the old order of the page. When we support a variable size THP, we won't have the order at this point, but we will have the number of pages. So change the interface to what the caller and callee would prefer. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: SeongJae Park Acked-by: Kirill A. Shutemov Cc: Huang Ying Link: https://lkml.kernel.org/r/20200908195539.25896-4-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/page_owner.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 8679ccd722e8..3468794f83d2 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -11,7 +11,7 @@ extern struct page_ext_operations page_owner_ops; extern void __reset_page_owner(struct page *page, unsigned int order); extern void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask); -extern void __split_page_owner(struct page *page, unsigned int order); +extern void __split_page_owner(struct page *page, unsigned int nr); extern void __copy_page_owner(struct page *oldpage, struct page *newpage); extern void __set_page_owner_migrate_reason(struct page *page, int reason); extern void __dump_page_owner(struct page *page); @@ -31,10 +31,10 @@ static inline void set_page_owner(struct page *page, __set_page_owner(page, order, gfp_mask); } -static inline void split_page_owner(struct page *page, unsigned int order) +static inline void split_page_owner(struct page *page, unsigned int nr) { if (static_branch_unlikely(&page_owner_inited)) - __split_page_owner(page, order); + __split_page_owner(page, nr); } static inline void copy_page_owner(struct page *oldpage, struct page *newpage) { -- cgit v1.2.3 From 01c70267053d6718820ac0902d8823d5dd2a6adb Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Oct 2020 20:06:00 -0700 Subject: fs: add a filesystem flag for THPs The page cache needs to know whether the filesystem supports THPs so that it doesn't send THPs to filesystems which can't handle them. Dave Chinner points out that getting from the page mapping to the filesystem type is too many steps (mapping->host->i_sb->s_type->fs_flags) so cache that information in the address space flags. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: Alexander Viro Cc: "Matthew Wilcox (Oracle)" Cc: Hugh Dickins Cc: Song Liu Cc: Rik van Riel Cc: "Kirill A . Shutemov" Cc: Johannes Weiner Cc: Dave Chinner Cc: Christoph Hellwig Link: https://lkml.kernel.org/r/20200916032717.22917-1-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + include/linux/pagemap.h | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ae97d87a00d2..72369be23f91 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2209,6 +2209,7 @@ struct file_system_type { #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ +#define FS_THP_SUPPORT 8192 /* Remove once all fs converted */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index c3afd3242b54..820c970fd24a 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -29,6 +29,7 @@ enum mapping_flags { AS_EXITING = 4, /* final truncate in progress */ /* writeback related tags are not used */ AS_NO_WRITEBACK_TAGS = 5, + AS_THP_SUPPORT = 6, /* THPs supported */ }; /** @@ -120,6 +121,11 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) m->gfp_mask = mask; } +static inline bool mapping_thp_support(struct address_space *mapping) +{ + return test_bit(AS_THP_SUPPORT, &mapping->flags); +} + void release_pages(struct page **pages, int nr); /* -- cgit v1.2.3 From 6f4d2f9770cf154f9867f466d7b1b463a39f05a7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Oct 2020 20:06:03 -0700 Subject: fs: do not update nr_thps for mappings which support THPs The nr_thps counter is to support THPs in the page cache when the filesystem doesn't understand THPs. Eventually it will be removed, but we should still support filesystems which do not understand THPs yet. Move the nr_thp manipulation functions to filemap.h since they're page-cache specific. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: Alexander Viro Cc: "Matthew Wilcox (Oracle)" Cc: Hugh Dickins Cc: Song Liu Cc: Rik van Riel Cc: "Kirill A . Shutemov" Cc: Johannes Weiner Cc: Dave Chinner Cc: Christoph Hellwig Link: https://lkml.kernel.org/r/20200916032717.22917-2-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/fs.h | 27 --------------------------- include/linux/pagemap.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 72369be23f91..d1d166b46131 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2697,33 +2697,6 @@ static inline errseq_t file_sample_sb_err(struct file *file) return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err); } -static inline int filemap_nr_thps(struct address_space *mapping) -{ -#ifdef CONFIG_READ_ONLY_THP_FOR_FS - return atomic_read(&mapping->nr_thps); -#else - return 0; -#endif -} - -static inline void filemap_nr_thps_inc(struct address_space *mapping) -{ -#ifdef CONFIG_READ_ONLY_THP_FOR_FS - atomic_inc(&mapping->nr_thps); -#else - WARN_ON_ONCE(1); -#endif -} - -static inline void filemap_nr_thps_dec(struct address_space *mapping) -{ -#ifdef CONFIG_READ_ONLY_THP_FOR_FS - atomic_dec(&mapping->nr_thps); -#else - WARN_ON_ONCE(1); -#endif -} - extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 820c970fd24a..a0024528a9ee 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -126,6 +126,35 @@ static inline bool mapping_thp_support(struct address_space *mapping) return test_bit(AS_THP_SUPPORT, &mapping->flags); } +static inline int filemap_nr_thps(struct address_space *mapping) +{ +#ifdef CONFIG_READ_ONLY_THP_FOR_FS + return atomic_read(&mapping->nr_thps); +#else + return 0; +#endif +} + +static inline void filemap_nr_thps_inc(struct address_space *mapping) +{ +#ifdef CONFIG_READ_ONLY_THP_FOR_FS + if (!mapping_thp_support(mapping)) + atomic_inc(&mapping->nr_thps); +#else + WARN_ON_ONCE(1); +#endif +} + +static inline void filemap_nr_thps_dec(struct address_space *mapping) +{ +#ifdef CONFIG_READ_ONLY_THP_FOR_FS + if (!mapping_thp_support(mapping)) + atomic_dec(&mapping->nr_thps); +#else + WARN_ON_ONCE(1); +#endif +} + void release_pages(struct page **pages, int nr); /* -- cgit v1.2.3 From 1aa83cfa5a20a6bbd39d2355a89c95152e4b37b4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Oct 2020 20:06:10 -0700 Subject: mm/readahead: add DEFINE_READAHEAD Patch series "Readahead patches for 5.9/5.10". These are infrastructure for both the THP patchset and for the fscache rewrite, For both pieces of infrastructure being build on top of this patchset, we want the ractl to be available higher in the call-stack. For David's work, he wants to add the 'critical page' to the ractl so that he knows which page NEEDS to be brought in from storage, and which ones are nice-to-have. We might want something similar in block storage too. It used to be simple -- the first page was the critical one, but then mmap added fault-around and so for that usecase, the middle page is the critical one. Anyway, I don't have any code to show that yet, we just know that the lowest point in the callchain where we have that information is do_sync_mmap_readahead() and so the ractl needs to start its life there. For THP, we havew the code that needs it. It's actually the apex patch to the series; the one which finally starts to allocate THPs and present them to consenting filesystems: http://git.infradead.org/users/willy/pagecache.git/commitdiff/798bcf30ab2eff278caad03a9edca74d2f8ae760 This patch (of 8): Allow for a more concise definition of a struct readahead_control. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: Eric Biggers Cc: David Howells Link: https://lkml.kernel.org/r/20200903140844.14194-1-willy@infradead.org Link: https://lkml.kernel.org/r/20200903140844.14194-3-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a0024528a9ee..63c81b512e80 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -812,6 +812,13 @@ struct readahead_control { unsigned int _batch_count; }; +#define DEFINE_READAHEAD(rac, f, m, i) \ + struct readahead_control rac = { \ + .file = f, \ + .mapping = m, \ + ._index = i, \ + } + /** * readahead_page - Get the next page to read. * @rac: The current readahead request. -- cgit v1.2.3 From 73bb49da50cd460bb3ba31250ed2e7fbf2115edf Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Oct 2020 20:06:14 -0700 Subject: mm/readahead: make page_cache_ra_unbounded take a readahead_control Define it in the callers instead of in page_cache_ra_unbounded(). Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: David Howells Cc: Eric Biggers Link: https://lkml.kernel.org/r/20200903140844.14194-4-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 63c81b512e80..37f209ccef0f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -768,9 +768,8 @@ void page_cache_sync_readahead(struct address_space *, struct file_ra_state *, void page_cache_async_readahead(struct address_space *, struct file_ra_state *, struct file *, struct page *, pgoff_t index, unsigned long req_count); -void page_cache_readahead_unbounded(struct address_space *, struct file *, - pgoff_t index, unsigned long nr_to_read, - unsigned long lookahead_count); +void page_cache_ra_unbounded(struct readahead_control *, + unsigned long nr_to_read, unsigned long lookahead_count); /* * Like add_to_page_cache_locked, but used to add newly allocated pages: -- cgit v1.2.3 From fefa7c478fdafe71c64b5ddf817ac0271aed1146 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Oct 2020 20:06:28 -0700 Subject: mm/readahead: add page_cache_sync_ra and page_cache_async_ra Reimplement page_cache_sync_readahead() and page_cache_async_readahead() as wrappers around versions of the function which take a readahead_control in preparation for making do_sync_mmap_readahead() pass down an RAC struct. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: David Howells Cc: Eric Biggers Link: https://lkml.kernel.org/r/20200903140844.14194-8-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 64 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 37f209ccef0f..c77b7c31b2e4 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -761,16 +761,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); void delete_from_page_cache_batch(struct address_space *mapping, struct pagevec *pvec); -#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) - -void page_cache_sync_readahead(struct address_space *, struct file_ra_state *, - struct file *, pgoff_t index, unsigned long req_count); -void page_cache_async_readahead(struct address_space *, struct file_ra_state *, - struct file *, struct page *, pgoff_t index, - unsigned long req_count); -void page_cache_ra_unbounded(struct readahead_control *, - unsigned long nr_to_read, unsigned long lookahead_count); - /* * Like add_to_page_cache_locked, but used to add newly allocated pages: * the page is new, so we can just run __SetPageLocked() against it. @@ -818,6 +808,60 @@ struct readahead_control { ._index = i, \ } +#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) + +void page_cache_ra_unbounded(struct readahead_control *, + unsigned long nr_to_read, unsigned long lookahead_count); +void page_cache_sync_ra(struct readahead_control *, struct file_ra_state *, + unsigned long req_count); +void page_cache_async_ra(struct readahead_control *, struct file_ra_state *, + struct page *, unsigned long req_count); + +/** + * page_cache_sync_readahead - generic file readahead + * @mapping: address_space which holds the pagecache and I/O vectors + * @ra: file_ra_state which holds the readahead state + * @file: Used by the filesystem for authentication. + * @index: Index of first page to be read. + * @req_count: Total number of pages being read by the caller. + * + * page_cache_sync_readahead() should be called when a cache miss happened: + * it will submit the read. The readahead logic may decide to piggyback more + * pages onto the read request if access patterns suggest it will improve + * performance. + */ +static inline +void page_cache_sync_readahead(struct address_space *mapping, + struct file_ra_state *ra, struct file *file, pgoff_t index, + unsigned long req_count) +{ + DEFINE_READAHEAD(ractl, file, mapping, index); + page_cache_sync_ra(&ractl, ra, req_count); +} + +/** + * page_cache_async_readahead - file readahead for marked pages + * @mapping: address_space which holds the pagecache and I/O vectors + * @ra: file_ra_state which holds the readahead state + * @file: Used by the filesystem for authentication. + * @page: The page at @index which triggered the readahead call. + * @index: Index of first page to be read. + * @req_count: Total number of pages being read by the caller. + * + * page_cache_async_readahead() should be called when a page is used which + * is marked as PageReadahead; this is a marker to suggest that the application + * has used up enough of the readahead window that we should start pulling in + * more pages. + */ +static inline +void page_cache_async_readahead(struct address_space *mapping, + struct file_ra_state *ra, struct file *file, + struct page *page, pgoff_t index, unsigned long req_count) +{ + DEFINE_READAHEAD(ractl, file, mapping, index); + page_cache_async_ra(&ractl, ra, page, req_count); +} + /** * readahead_page - Get the next page to read. * @rac: The current readahead request. -- cgit v1.2.3 From 7e27f22c9e40b66186e0675376f0495725ff1b0a Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Thu, 15 Oct 2020 20:06:50 -0700 Subject: mm,hwpoison: unexport get_hwpoison_page and make it static Since get_hwpoison_page is only used in memory-failure code now, let us un-export it and make it private to that code. Signed-off-by: Oscar Salvador Signed-off-by: Andrew Morton Acked-by: Naoya Horiguchi Cc: "Aneesh Kumar K.V" Cc: Aneesh Kumar K.V Cc: Aristeu Rozanski Cc: Dave Hansen Cc: David Hildenbrand Cc: Dmitry Yakunin Cc: Michal Hocko Cc: Mike Kravetz Cc: Oscar Salvador Cc: Qian Cai Cc: Tony Luck Link: https://lkml.kernel.org/r/20200922135650.1634-5-osalvador@suse.de Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 620961e4f32b..1977c09afe7a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3025,7 +3025,6 @@ extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue(unsigned long pfn, int flags); extern void memory_failure_queue_kick(int cpu); extern int unpoison_memory(unsigned long pfn); -extern int get_hwpoison_page(struct page *page); #define put_hwpoison_page(page) put_page(page) extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; -- cgit v1.2.3 From dd6e2402fad966290f35dc687294fb6049714aac Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Thu, 15 Oct 2020 20:06:57 -0700 Subject: mm,hwpoison: kill put_hwpoison_page After commit 4e41a30c6d50 ("mm: hwpoison: adjust for new thp refcounting"), put_hwpoison_page got reduced to a put_page. Let us just use put_page instead. Signed-off-by: Oscar Salvador Signed-off-by: Andrew Morton Acked-by: Naoya Horiguchi Cc: "Aneesh Kumar K.V" Cc: Aneesh Kumar K.V Cc: Aristeu Rozanski Cc: Dave Hansen Cc: David Hildenbrand Cc: Dmitry Yakunin Cc: Michal Hocko Cc: Mike Kravetz Cc: Oscar Salvador Cc: Qian Cai Cc: Tony Luck Link: https://lkml.kernel.org/r/20200922135650.1634-7-osalvador@suse.de Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1977c09afe7a..ab038a3521b4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3025,7 +3025,6 @@ extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue(unsigned long pfn, int flags); extern void memory_failure_queue_kick(int cpu); extern int unpoison_memory(unsigned long pfn); -#define put_hwpoison_page(page) put_page(page) extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p, int access); -- cgit v1.2.3 From 06be6ff3d2ec8be806b859fc054a1909b16d2473 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Thu, 15 Oct 2020 20:07:05 -0700 Subject: mm,hwpoison: rework soft offline for free pages When trying to soft-offline a free page, we need to first take it off the buddy allocator. Once we know is out of reach, we can safely flag it as poisoned. take_page_off_buddy will be used to take a page meant to be poisoned off the buddy allocator. take_page_off_buddy calls break_down_buddy_pages, which splits a higher-order page in case our page belongs to one. Once the page is under our control, we call page_handle_poison to set it as poisoned and grab a refcount on it. Signed-off-by: Oscar Salvador Signed-off-by: Andrew Morton Acked-by: Naoya Horiguchi Cc: "Aneesh Kumar K.V" Cc: Aneesh Kumar K.V Cc: Aristeu Rozanski Cc: Dave Hansen Cc: David Hildenbrand Cc: Dmitry Yakunin Cc: Michal Hocko Cc: Mike Kravetz Cc: Oscar Salvador Cc: Qian Cai Cc: Tony Luck Link: https://lkml.kernel.org/r/20200922135650.1634-9-osalvador@suse.de Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 38ded408bd4c..a02b6d0221db 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -432,6 +432,7 @@ PAGEFLAG(HWPoison, hwpoison, PF_ANY) TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) extern bool set_hwpoison_free_buddy_page(struct page *page); +extern bool take_page_off_buddy(struct page *page); #else PAGEFLAG_FALSE(HWPoison) static inline bool set_hwpoison_free_buddy_page(struct page *page) -- cgit v1.2.3 From 79f5f8fab482dfff62948214468ac4ebbf0a016f Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Thu, 15 Oct 2020 20:07:09 -0700 Subject: mm,hwpoison: rework soft offline for in-use pages This patch changes the way we set and handle in-use poisoned pages. Until now, poisoned pages were released to the buddy allocator, trusting that the checks that take place at allocation time would act as a safe net and would skip that page. This has proved to be wrong, as we got some pfn walkers out there, like compaction, that all they care is the page to be in a buddy freelist. Although this might not be the only user, having poisoned pages in the buddy allocator seems a bad idea as we should only have free pages that are ready and meant to be used as such. Before explaining the taken approach, let us break down the kind of pages we can soft offline. - Anonymous THP (after the split, they end up being 4K pages) - Hugetlb - Order-0 pages (that can be either migrated or invalited) * Normal pages (order-0 and anon-THP) - If they are clean and unmapped page cache pages, we invalidate then by means of invalidate_inode_page(). - If they are mapped/dirty, we do the isolate-and-migrate dance. Either way, do not call put_page directly from those paths. Instead, we keep the page and send it to page_handle_poison to perform the right handling. page_handle_poison sets the HWPoison flag and does the last put_page. Down the chain, we placed a check for HWPoison page in free_pages_prepare, that just skips any poisoned page, so those pages do not end up in any pcplist/freelist. After that, we set the refcount on the page to 1 and we increment the poisoned pages counter. If we see that the check in free_pages_prepare creates trouble, we can always do what we do for free pages: - wait until the page hits buddy's freelists - take it off, and flag it The downside of the above approach is that we could race with an allocation, so by the time we want to take the page off the buddy, the page has been already allocated so we cannot soft offline it. But the user could always retry it. * Hugetlb pages - We isolate-and-migrate them After the migration has been successful, we call dissolve_free_huge_page, and we set HWPoison on the page if we succeed. Hugetlb has a slightly different handling though. While for non-hugetlb pages we cared about closing the race with an allocation, doing so for hugetlb pages requires quite some additional and intrusive code (we would need to hook in free_huge_page and some other places). So I decided to not make the code overly complicated and just fail normally if the page we allocated in the meantime. We can always build on top of this. As a bonus, because of the way we handle now in-use pages, we no longer need the put-as-isolation-migratetype dance, that was guarding for poisoned pages to end up in pcplists. Signed-off-by: Oscar Salvador Signed-off-by: Andrew Morton Acked-by: Naoya Horiguchi Cc: "Aneesh Kumar K.V" Cc: Aneesh Kumar K.V Cc: Aristeu Rozanski Cc: Dave Hansen Cc: David Hildenbrand Cc: Dmitry Yakunin Cc: Michal Hocko Cc: Mike Kravetz Cc: Oscar Salvador Cc: Qian Cai Cc: Tony Luck Link: https://lkml.kernel.org/r/20200922135650.1634-10-osalvador@suse.de Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a02b6d0221db..4f6ba9379112 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -431,14 +431,9 @@ PAGEFLAG_FALSE(Uncached) PAGEFLAG(HWPoison, hwpoison, PF_ANY) TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) -extern bool set_hwpoison_free_buddy_page(struct page *page); extern bool take_page_off_buddy(struct page *page); #else PAGEFLAG_FALSE(HWPoison) -static inline bool set_hwpoison_free_buddy_page(struct page *page) -{ - return 0; -} #define __PG_HWPOISON 0 #endif -- cgit v1.2.3 From 5d1fd5dc877bc1c670e7b1c174aa659b76c07de1 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 15 Oct 2020 20:07:21 -0700 Subject: mm,hwpoison: introduce MF_MSG_UNSPLIT_THP memory_failure() is supposed to call action_result() when it handles a memory error event, but there's one missing case. So let's add it. I find that include/ras/ras_event.h has some other MF_MSG_* undefined, so this patch also adds them. Signed-off-by: Naoya Horiguchi Signed-off-by: Oscar Salvador Signed-off-by: Andrew Morton Cc: "Aneesh Kumar K.V" Cc: Aneesh Kumar K.V Cc: Aristeu Rozanski Cc: Dave Hansen Cc: David Hildenbrand Cc: Dmitry Yakunin Cc: Michal Hocko Cc: Mike Kravetz Cc: Oscar Salvador Cc: Qian Cai Cc: Tony Luck Link: https://lkml.kernel.org/r/20200922135650.1634-13-osalvador@suse.de Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ab038a3521b4..a9df46309e07 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3064,6 +3064,7 @@ enum mf_action_page_type { MF_MSG_BUDDY, MF_MSG_BUDDY_2ND, MF_MSG_DAX, + MF_MSG_UNSPLIT_THP, MF_MSG_UNKNOWN, }; -- cgit v1.2.3 From 257bea71582d895894201b604990a900df489103 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 15 Oct 2020 20:07:59 -0700 Subject: mm/page_alloc: simplify __offline_isolated_pages() offline_pages() is the only user. __offline_isolated_pages() never gets called with ranges that contain memory holes and we no longer care about the return value. Drop the return value handling and all pfn_valid() checks. Update the documentation. Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Cc: Wei Yang Cc: Baoquan He Cc: Pankaj Gupta Cc: Charan Teja Reddy Cc: Dan Williams Cc: Fenghua Yu Cc: Logan Gunthorpe Cc: "Matthew Wilcox (Oracle)" Cc: Mel Gorman Cc: Mel Gorman Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Tony Luck Link: https://lkml.kernel.org/r/20200819175957.28465-5-david@redhat.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index c0faa7a30c46..76b314031f09 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -103,8 +103,8 @@ extern int online_pages(unsigned long pfn, unsigned long nr_pages, int online_type, int nid); extern struct zone *test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn); -extern unsigned long __offline_isolated_pages(unsigned long start_pfn, - unsigned long end_pfn); +extern void __offline_isolated_pages(unsigned long start_pfn, + unsigned long end_pfn); typedef void (*online_page_callback_t)(struct page *page, unsigned int order); -- cgit v1.2.3 From d882c0067d99d0f2add9a41628703cc99511a639 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 15 Oct 2020 20:08:19 -0700 Subject: mm: pass migratetype into memmap_init_zone() and move_pfn_range_to_zone() On the memory onlining path, we want to start with MIGRATE_ISOLATE, to un-isolate the pages after memory onlining is complete. Let's allow passing in the migratetype. Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Cc: Wei Yang Cc: Baoquan He Cc: Pankaj Gupta Cc: Tony Luck Cc: Fenghua Yu Cc: Logan Gunthorpe Cc: Dan Williams Cc: Mike Rapoport Cc: "Matthew Wilcox (Oracle)" Cc: Michel Lespinasse Cc: Charan Teja Reddy Cc: Mel Gorman Link: https://lkml.kernel.org/r/20200819175957.28465-10-david@redhat.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 3 ++- include/linux/mm.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 76b314031f09..51a877fec8da 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -351,7 +351,8 @@ extern int add_memory_resource(int nid, struct resource *resource); extern int add_memory_driver_managed(int nid, u64 start, u64 size, const char *resource_name); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages, struct vmem_altmap *altmap); + unsigned long nr_pages, + struct vmem_altmap *altmap, int migratetype); extern void remove_pfn_range_from_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); diff --git a/include/linux/mm.h b/include/linux/mm.h index a9df46309e07..61a2633fcc7f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2440,7 +2440,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn, extern void set_dma_reserve(unsigned long new_dma_reserve); extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, - enum meminit_context, struct vmem_altmap *); + enum meminit_context, struct vmem_altmap *, int migratetype); extern void setup_per_zone_wmarks(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); -- cgit v1.2.3 From ec62d04e3fdc4ba3a7912cd7f6da1a4e787a0d75 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 15 Oct 2020 20:08:28 -0700 Subject: kernel/resource: make release_mem_region_adjustable() never fail Patch series "selective merging of system ram resources", v4. Some add_memory*() users add memory in small, contiguous memory blocks. Examples include virtio-mem, hyper-v balloon, and the XEN balloon. This can quickly result in a lot of memory resources, whereby the actual resource boundaries are not of interest (e.g., it might be relevant for DIMMs, exposed via /proc/iomem to user space). We really want to merge added resources in this scenario where possible. Resources are effectively stored in a list-based tree. Having a lot of resources not only wastes memory, it also makes traversing that tree more expensive, and makes /proc/iomem explode in size (e.g., requiring kexec-tools to manually merge resources when creating a kdump header. The current kexec-tools resource count limit does not allow for more than ~100GB of memory with a memory block size of 128MB on x86-64). Let's allow to selectively merge system ram resources by specifying a new flag for add_memory*(). Patch #5 contains a /proc/iomem example. Only tested with virtio-mem. This patch (of 8): Let's make sure splitting a resource on memory hotunplug will never fail. This will become more relevant once we merge selected System RAM resources - then, we'll trigger that case more often on memory hotunplug. In general, this function is already unlikely to fail. When we remove memory, we free up quite a lot of metadata (memmap, page tables, memory block device, etc.). The only reason it could really fail would be when injecting allocation errors. All other error cases inside release_mem_region_adjustable() seem to be sanity checks if the function would be abused in different context - let's add WARN_ON_ONCE() in these cases so we can catch them. [natechancellor@gmail.com: fix use of ternary condition in release_mem_region_adjustable] Link: https://lkml.kernel.org/r/20200922060748.2452056-1-natechancellor@gmail.com Link: https://github.com/ClangBuiltLinux/linux/issues/1159 Signed-off-by: David Hildenbrand Signed-off-by: Nathan Chancellor Signed-off-by: Andrew Morton Cc: Michal Hocko Cc: Dan Williams Cc: Jason Gunthorpe Cc: Kees Cook Cc: Ard Biesheuvel Cc: Pankaj Gupta Cc: Baoquan He Cc: Wei Yang Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Boris Ostrovsky Cc: Christian Borntraeger Cc: Dave Jiang Cc: Eric Biederman Cc: Greg Kroah-Hartman Cc: Haiyang Zhang Cc: Heiko Carstens Cc: Jason Wang Cc: Juergen Gross Cc: Julien Grall Cc: "K. Y. Srinivasan" Cc: Len Brown Cc: Leonardo Bras Cc: Libor Pechacek Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Nathan Lynch Cc: "Oliver O'Halloran" Cc: Paul Mackerras Cc: Pingfan Liu Cc: "Rafael J. Wysocki" Cc: Roger Pau Monn Cc: Stefano Stabellini Cc: Stephen Hemminger Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vishal Verma Cc: Wei Liu Link: https://lkml.kernel.org/r/20200911103459.10306-2-david@redhat.com Signed-off-by: Linus Torvalds --- include/linux/ioport.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 6c2b06fe8beb..52a91f5fa1a3 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -248,8 +248,8 @@ extern struct resource * __request_region(struct resource *, extern void __release_region(struct resource *, resource_size_t, resource_size_t); #ifdef CONFIG_MEMORY_HOTREMOVE -extern int release_mem_region_adjustable(struct resource *, resource_size_t, - resource_size_t); +extern void release_mem_region_adjustable(struct resource *, resource_size_t, + resource_size_t); #endif /* Wrappers for managed devices */ -- cgit v1.2.3 From 7cf603d17d9bddbda90c424b6f30c7bc2e6f48f2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 15 Oct 2020 20:08:33 -0700 Subject: kernel/resource: move and rename IORESOURCE_MEM_DRIVER_MANAGED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IORESOURCE_MEM_DRIVER_MANAGED currently uses an unused PnP bit, which is always set to 0 by hardware. This is far from beautiful (and confusing), and the bit only applies to SYSRAM. So let's move it out of the bus-specific (PnP) defined bits. We'll add another SYSRAM specific bit soon. If we ever need more bits for other purposes, we can steal some from "desc", or reshuffle/regroup what we have. Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Cc: Michal Hocko Cc: Dan Williams Cc: Jason Gunthorpe Cc: Kees Cook Cc: Ard Biesheuvel Cc: Pankaj Gupta Cc: Baoquan He Cc: Wei Yang Cc: Eric Biederman Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Boris Ostrovsky Cc: Christian Borntraeger Cc: Dave Jiang Cc: Haiyang Zhang Cc: Heiko Carstens Cc: Jason Wang Cc: Juergen Gross Cc: Julien Grall Cc: "K. Y. Srinivasan" Cc: Len Brown Cc: Leonardo Bras Cc: Libor Pechacek Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Nathan Lynch Cc: "Oliver O'Halloran" Cc: Paul Mackerras Cc: Pingfan Liu Cc: "Rafael J. Wysocki" Cc: Roger Pau Monné Cc: Stefano Stabellini Cc: Stephen Hemminger Cc: Vasily Gorbik Cc: Vishal Verma Cc: Wei Liu Link: https://lkml.kernel.org/r/20200911103459.10306-3-david@redhat.com Signed-off-by: Linus Torvalds --- include/linux/ioport.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 52a91f5fa1a3..d7620d7c941a 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -58,6 +58,9 @@ struct resource { #define IORESOURCE_EXT_TYPE_BITS 0x01000000 /* Resource extended types */ #define IORESOURCE_SYSRAM 0x01000000 /* System RAM (modifier) */ +/* IORESOURCE_SYSRAM specific bits. */ +#define IORESOURCE_SYSRAM_DRIVER_MANAGED 0x02000000 /* Always detected via a driver. */ + #define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ #define IORESOURCE_DISABLED 0x10000000 @@ -103,7 +106,6 @@ struct resource { #define IORESOURCE_MEM_32BIT (3<<3) #define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */ #define IORESOURCE_MEM_EXPANSIONROM (1<<6) -#define IORESOURCE_MEM_DRIVER_MANAGED (1<<7) /* PnP I/O specific bits (IORESOURCE_BITS) */ #define IORESOURCE_IO_16BIT_ADDR (1<<0) -- cgit v1.2.3 From 3a0aaefe4134951b4e89feb873c457428154530c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 15 Oct 2020 20:08:39 -0700 Subject: mm/memory_hotplug: guard more declarations by CONFIG_MEMORY_HOTPLUG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We soon want to pass flags via a new type to add_memory() and friends. That revealed that we currently don't guard some declarations by CONFIG_MEMORY_HOTPLUG. While some definitions could be moved to different places, let's keep it minimal for now and use CONFIG_MEMORY_HOTPLUG for all functions only compiled with CONFIG_MEMORY_HOTPLUG. Wrap sparse_decode_mem_map() into CONFIG_MEMORY_HOTPLUG, it's only called from CONFIG_MEMORY_HOTPLUG code. While at it, remove allow_online_pfn_range(), which is no longer around, and mhp_notimplemented(), which is unused. Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Cc: Michal Hocko Cc: Dan Williams Cc: Pankaj Gupta Cc: Baoquan He Cc: Wei Yang Cc: Anton Blanchard Cc: Ard Biesheuvel Cc: Benjamin Herrenschmidt Cc: Boris Ostrovsky Cc: Christian Borntraeger Cc: Dave Jiang Cc: Eric Biederman Cc: Greg Kroah-Hartman Cc: Haiyang Zhang Cc: Heiko Carstens Cc: Jason Gunthorpe Cc: Jason Wang Cc: Juergen Gross Cc: Julien Grall Cc: Kees Cook Cc: "K. Y. Srinivasan" Cc: Len Brown Cc: Leonardo Bras Cc: Libor Pechacek Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Nathan Lynch Cc: "Oliver O'Halloran" Cc: Paul Mackerras Cc: Pingfan Liu Cc: "Rafael J. Wysocki" Cc: Roger Pau Monné Cc: Stefano Stabellini Cc: Stephen Hemminger Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vishal Verma Cc: Wei Liu Link: https://lkml.kernel.org/r/20200911103459.10306-4-david@redhat.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 51a877fec8da..1504b4d5ae6c 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -247,13 +247,6 @@ static inline void zone_span_writelock(struct zone *zone) {} static inline void zone_span_writeunlock(struct zone *zone) {} static inline void zone_seqlock_init(struct zone *zone) {} -static inline int mhp_notimplemented(const char *func) -{ - printk(KERN_WARNING "%s() called, with CONFIG_MEMORY_HOTPLUG disabled\n", func); - dump_stack(); - return -ENOSYS; -} - static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) { } @@ -344,6 +337,7 @@ static inline void __remove_memory(int nid, u64 start, u64 size) {} extern void set_zone_contiguous(struct zone *zone); extern void clear_zone_contiguous(struct zone *zone); +#ifdef CONFIG_MEMORY_HOTPLUG extern void __ref free_area_init_core_hotplug(int nid); extern int __add_memory(int nid, u64 start, u64 size); extern int add_memory(int nid, u64 start, u64 size); @@ -364,8 +358,8 @@ extern void sparse_remove_section(struct mem_section *ms, unsigned long map_offset, struct vmem_altmap *altmap); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); -extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, - int online_type); extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn, unsigned long nr_pages); +#endif /* CONFIG_MEMORY_HOTPLUG */ + #endif /* __LINUX_MEMORY_HOTPLUG_H */ -- cgit v1.2.3 From b6117199787c60539105d2de0d010146e8396fc3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 15 Oct 2020 20:08:44 -0700 Subject: mm/memory_hotplug: prepare passing flags to add_memory() and friends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We soon want to pass flags, e.g., to mark added System RAM resources. mergeable. Prepare for that. This patch is based on a similar patch by Oscar Salvador: https://lkml.kernel.org/r/20190625075227.15193-3-osalvador@suse.de Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Reviewed-by: Juergen Gross # Xen related part Reviewed-by: Pankaj Gupta Acked-by: Wei Liu Cc: Michal Hocko Cc: Dan Williams Cc: Jason Gunthorpe Cc: Baoquan He Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Greg Kroah-Hartman Cc: Vishal Verma Cc: Dave Jiang Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Wei Liu Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: David Hildenbrand Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Boris Ostrovsky Cc: Stefano Stabellini Cc: "Oliver O'Halloran" Cc: Pingfan Liu Cc: Nathan Lynch Cc: Libor Pechacek Cc: Anton Blanchard Cc: Leonardo Bras Cc: Ard Biesheuvel Cc: Eric Biederman Cc: Julien Grall Cc: Kees Cook Cc: Roger Pau Monné Cc: Thomas Gleixner Cc: Wei Yang Link: https://lkml.kernel.org/r/20200911103459.10306-5-david@redhat.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 1504b4d5ae6c..33eb80fdba22 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -57,6 +57,12 @@ enum { MMOP_ONLINE_MOVABLE, }; +/* Flags for add_memory() and friends to specify memory hotplug details. */ +typedef int __bitwise mhp_t; + +/* No special request */ +#define MHP_NONE ((__force mhp_t)0) + /* * Extended parameters for memory hotplug: * altmap: alternative allocator for memmap array (optional) @@ -339,11 +345,13 @@ extern void clear_zone_contiguous(struct zone *zone); #ifdef CONFIG_MEMORY_HOTPLUG extern void __ref free_area_init_core_hotplug(int nid); -extern int __add_memory(int nid, u64 start, u64 size); -extern int add_memory(int nid, u64 start, u64 size); -extern int add_memory_resource(int nid, struct resource *resource); +extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); +extern int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); +extern int add_memory_resource(int nid, struct resource *resource, + mhp_t mhp_flags); extern int add_memory_driver_managed(int nid, u64 start, u64 size, - const char *resource_name); + const char *resource_name, + mhp_t mhp_flags); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap, int migratetype); -- cgit v1.2.3 From 9ca6551ee24368a4d2b09566ea4d10fe87860379 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 15 Oct 2020 20:08:49 -0700 Subject: mm/memory_hotplug: MEMHP_MERGE_RESOURCE to specify merging of System RAM resources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some add_memory*() users add memory in small, contiguous memory blocks. Examples include virtio-mem, hyper-v balloon, and the XEN balloon. This can quickly result in a lot of memory resources, whereby the actual resource boundaries are not of interest (e.g., it might be relevant for DIMMs, exposed via /proc/iomem to user space). We really want to merge added resources in this scenario where possible. Let's provide a flag (MEMHP_MERGE_RESOURCE) to specify that a resource either created within add_memory*() or passed via add_memory_resource() shall be marked mergeable and merged with applicable siblings. To implement that, we need a kernel/resource interface to mark selected System RAM resources mergeable (IORESOURCE_SYSRAM_MERGEABLE) and trigger merging. Note: We really want to merge after the whole operation succeeded, not directly when adding a resource to the resource tree (it would break add_memory_resource() and require splitting resources again when the operation failed - e.g., due to -ENOMEM). Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Reviewed-by: Pankaj Gupta Cc: Michal Hocko Cc: Dan Williams Cc: Jason Gunthorpe Cc: Kees Cook Cc: Ard Biesheuvel Cc: Thomas Gleixner Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Wei Liu Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Cc: Roger Pau Monné Cc: Julien Grall Cc: Baoquan He Cc: Wei Yang Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Christian Borntraeger Cc: Dave Jiang Cc: Eric Biederman Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Jason Wang Cc: Len Brown Cc: Leonardo Bras Cc: Libor Pechacek Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Nathan Lynch Cc: "Oliver O'Halloran" Cc: Paul Mackerras Cc: Pingfan Liu Cc: "Rafael J. Wysocki" Cc: Vasily Gorbik Cc: Vishal Verma Link: https://lkml.kernel.org/r/20200911103459.10306-6-david@redhat.com Signed-off-by: Linus Torvalds --- include/linux/ioport.h | 4 ++++ include/linux/memory_hotplug.h | 7 +++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index d7620d7c941a..7e61389dcb01 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -60,6 +60,7 @@ struct resource { /* IORESOURCE_SYSRAM specific bits. */ #define IORESOURCE_SYSRAM_DRIVER_MANAGED 0x02000000 /* Always detected via a driver. */ +#define IORESOURCE_SYSRAM_MERGEABLE 0x04000000 /* Resource can be merged. */ #define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ @@ -253,6 +254,9 @@ extern void __release_region(struct resource *, resource_size_t, extern void release_mem_region_adjustable(struct resource *, resource_size_t, resource_size_t); #endif +#ifdef CONFIG_MEMORY_HOTPLUG +extern void merge_system_ram_resource(struct resource *res); +#endif /* Wrappers for managed devices */ struct device; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 33eb80fdba22..d65c6fdc5cfc 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -62,6 +62,13 @@ typedef int __bitwise mhp_t; /* No special request */ #define MHP_NONE ((__force mhp_t)0) +/* + * Allow merging of the added System RAM resource with adjacent, + * mergeable resources. After a successful call to add_memory_resource() + * with this flag set, the resource pointer must no longer be used as it + * might be stale, or the resource might have changed. + */ +#define MEMHP_MERGE_RESOURCE ((__force mhp_t)BIT(0)) /* * Extended parameters for memory hotplug: -- cgit v1.2.3 From cb8e3c8b4f45e4ed8987a581956dc9c3827a5bcf Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 15 Oct 2020 20:09:12 -0700 Subject: kernel/resource: make iomem_resource implicit in release_mem_region_adjustable() "mem" in the name already indicates the root, similar to release_mem_region() and devm_request_mem_region(). Make it implicit. The only single caller always passes iomem_resource, other parents are not applicable. Suggested-by: Wei Yang Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Reviewed-by: Wei Yang Cc: Michal Hocko Cc: Dan Williams Cc: Jason Gunthorpe Cc: Kees Cook Cc: Ard Biesheuvel Cc: Pankaj Gupta Cc: Baoquan He Link: https://lkml.kernel.org/r/20200916073041.10355-1-david@redhat.com Signed-off-by: Linus Torvalds --- include/linux/ioport.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 7e61389dcb01..5135d4b86cd6 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -251,8 +251,7 @@ extern struct resource * __request_region(struct resource *, extern void __release_region(struct resource *, resource_size_t, resource_size_t); #ifdef CONFIG_MEMORY_HOTREMOVE -extern void release_mem_region_adjustable(struct resource *, resource_size_t, - resource_size_t); +extern void release_mem_region_adjustable(resource_size_t, resource_size_t); #endif #ifdef CONFIG_MEMORY_HOTPLUG extern void merge_system_ram_resource(struct resource *res); -- cgit v1.2.3 From 90c7eaeb14a325a760d732184ff1fbed47e5fa98 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Thu, 15 Oct 2020 20:09:15 -0700 Subject: mm: don't panic when links can't be created in sysfs At boot time, or when doing memory hot-add operations, if the links in sysfs can't be created, the system is still able to run, so just report the error in the kernel log rather than BUG_ON and potentially make system unusable because the callpath can be called with locks held. Since the number of memory blocks managed could be high, the messages are rate limited. As a consequence, link_mem_sections() has no status to report anymore. Signed-off-by: Laurent Dufour Signed-off-by: Andrew Morton Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Acked-by: David Hildenbrand Cc: Greg Kroah-Hartman Cc: Fenghua Yu Cc: Nathan Lynch Cc: "Rafael J . Wysocki" Cc: Scott Cheloha Cc: Tony Luck Link: https://lkml.kernel.org/r/20200915094143.79181-4-ldufour@linux.ibm.com Signed-off-by: Linus Torvalds --- include/linux/node.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/node.h b/include/linux/node.h index 014ba3ab2efd..8e5a29897936 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -99,15 +99,14 @@ extern struct node *node_devices[]; typedef void (*node_registration_func_t)(struct node *); #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA) -int link_mem_sections(int nid, unsigned long start_pfn, - unsigned long end_pfn, - enum meminit_context context); +void link_mem_sections(int nid, unsigned long start_pfn, + unsigned long end_pfn, + enum meminit_context context); #else -static inline int link_mem_sections(int nid, unsigned long start_pfn, - unsigned long end_pfn, - enum meminit_context context) +static inline void link_mem_sections(int nid, unsigned long start_pfn, + unsigned long end_pfn, + enum meminit_context context) { - return 0; } #endif @@ -130,8 +129,7 @@ static inline int register_one_node(int nid) if (error) return error; /* link memory sections under this node */ - error = link_mem_sections(nid, start_pfn, end_pfn, - MEMINIT_EARLY); + link_mem_sections(nid, start_pfn, end_pfn, MEMINIT_EARLY); } return error; -- cgit v1.2.3 From ed0173733dd468883198c3136284394320b8fad6 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 15 Oct 2020 20:09:55 -0700 Subject: mm: use self-explanatory macros rather than "2" Signed-off-by: Yu Zhao Signed-off-by: Andrew Morton Cc: Alex Shi Link: http://lkml.kernel.org/r/20200831175042.3527153-2-yuzhao@google.com Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 12 ++++++++---- include/linux/vmstat.h | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c27fb1faffe5..7e0ea3fe95ca 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -266,6 +266,8 @@ static inline bool is_active_lru(enum lru_list lru) return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); } +#define ANON_AND_FILE 2 + enum lruvec_flags { LRUVEC_CONGESTED, /* lruvec has many dirty pages * backed by a congested BDI @@ -283,8 +285,8 @@ struct lruvec { unsigned long file_cost; /* Non-resident age, driven by LRU movement */ atomic_long_t nonresident_age; - /* Refaults at the time of last reclaim cycle, anon=0, file=1 */ - unsigned long refaults[2]; + /* Refaults at the time of last reclaim cycle */ + unsigned long refaults[ANON_AND_FILE]; /* Various lruvec state flags (enum lruvec_flags) */ unsigned long flags; #ifdef CONFIG_MEMCG @@ -441,6 +443,8 @@ enum zone_type { #ifndef __GENERATING_BOUNDS_H +#define ASYNC_AND_SYNC 2 + struct zone { /* Read-mostly fields */ @@ -560,8 +564,8 @@ struct zone { #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* pfn where compaction free scanner should start */ unsigned long compact_cached_free_pfn; - /* pfn where async and sync compaction migration scanner should start */ - unsigned long compact_cached_migrate_pfn[2]; + /* pfn where compaction migration scanner should start */ + unsigned long compact_cached_migrate_pfn[ASYNC_AND_SYNC]; unsigned long compact_init_migrate_pfn; unsigned long compact_init_free_pfn; #endif diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 7557c1070fd7..322dcbfcc933 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -28,7 +28,7 @@ struct reclaim_stat { unsigned nr_writeback; unsigned nr_immediate; unsigned nr_pageout; - unsigned nr_activate[2]; + unsigned nr_activate[ANON_AND_FILE]; unsigned nr_ref_keep; unsigned nr_unmap_fail; unsigned nr_lazyfree_fail; -- cgit v1.2.3 From 1f0f8c0de09066d23760c1f5fac2cd53b32f1127 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 15 Oct 2020 20:10:11 -0700 Subject: include/linux/mmzone.h: remove unused early_pfn_valid() The early_pfn_valid() macro is defined but it is never used. Remove it. Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Acked-by: David Hildenbrand Link: https://lkml.kernel.org/r/20200923162915.26935-1-rppt@kernel.org Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7e0ea3fe95ca..fb3bf696c05e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1420,7 +1420,6 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr) #define pfn_to_nid(pfn) (0) #endif -#define early_pfn_valid(pfn) pfn_valid(pfn) void sparse_init(void); #else #define sparse_init() do {} while (0) @@ -1440,10 +1439,6 @@ struct mminit_pfnnid_cache { int last_nid; }; -#ifndef early_pfn_valid -#define early_pfn_valid(pfn) (1) -#endif - /* * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we * need to check pfn validity within that MAX_ORDER_NR_PAGES block. -- cgit v1.2.3 From b296a6d53339a79082c1d2c1761e948e8b3def69 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 15 Oct 2020 20:10:21 -0700 Subject: kernel.h: split out min()/max() et al. helpers kernel.h is being used as a dump for all kinds of stuff for a long time. Here is the attempt to start cleaning it up by splitting out min()/max() et al. helpers. At the same time convert users in header and lib folder to use new header. Though for time being include new header back to kernel.h to avoid twisted indirected includes for other existing users. Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton Cc: "Rafael J. Wysocki" Cc: Steven Rostedt Cc: Rasmus Villemoes Cc: Joe Perches Cc: Linus Torvalds Link: https://lkml.kernel.org/r/20200910164152.GA1891694@smile.fi.intel.com Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 1 + include/linux/bvec.h | 6 +- include/linux/jiffies.h | 3 +- include/linux/kernel.h | 150 +--------------------------------------------- include/linux/minmax.h | 153 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nodemask.h | 2 +- include/linux/uaccess.h | 1 + 7 files changed, 164 insertions(+), 152 deletions(-) create mode 100644 include/linux/minmax.h (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c09375e0a0eb..639cae2c158b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/bvec.h b/include/linux/bvec.h index dd74503f7e5e..2efec10bf792 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -7,10 +7,14 @@ #ifndef __LINUX_BVEC_ITER_H #define __LINUX_BVEC_ITER_H -#include #include #include +#include +#include #include +#include + +struct page; /** * struct bio_vec - a contiguous range of physical memory addresses diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index fed6ba96c527..5e13f801c902 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -3,8 +3,9 @@ #define _LINUX_JIFFIES_H #include +#include #include -#include +#include #include #include #include diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e4aa29b1ad62..c629215fdad9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -833,155 +834,6 @@ ftrace_vprintk(const char *fmt, va_list ap) static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* CONFIG_TRACING */ -/* - * min()/max()/clamp() macros must accomplish three things: - * - * - avoid multiple evaluations of the arguments (so side-effects like - * "x++" happen only once) when non-constant. - * - perform strict type-checking (to generate warnings instead of - * nasty runtime surprises). See the "unnecessary" pointer comparison - * in __typecheck(). - * - retain result as a constant expressions when called with only - * constant expressions (to avoid tripping VLA warnings in stack - * allocation usage). - */ -#define __typecheck(x, y) \ - (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) - -/* - * This returns a constant expression while determining if an argument is - * a constant expression, most importantly without evaluating the argument. - * Glory to Martin Uecker - */ -#define __is_constexpr(x) \ - (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) - -#define __no_side_effects(x, y) \ - (__is_constexpr(x) && __is_constexpr(y)) - -#define __safe_cmp(x, y) \ - (__typecheck(x, y) && __no_side_effects(x, y)) - -#define __cmp(x, y, op) ((x) op (y) ? (x) : (y)) - -#define __cmp_once(x, y, unique_x, unique_y, op) ({ \ - typeof(x) unique_x = (x); \ - typeof(y) unique_y = (y); \ - __cmp(unique_x, unique_y, op); }) - -#define __careful_cmp(x, y, op) \ - __builtin_choose_expr(__safe_cmp(x, y), \ - __cmp(x, y, op), \ - __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) - -/** - * min - return minimum of two values of the same or compatible types - * @x: first value - * @y: second value - */ -#define min(x, y) __careful_cmp(x, y, <) - -/** - * max - return maximum of two values of the same or compatible types - * @x: first value - * @y: second value - */ -#define max(x, y) __careful_cmp(x, y, >) - -/** - * min3 - return minimum of three values - * @x: first value - * @y: second value - * @z: third value - */ -#define min3(x, y, z) min((typeof(x))min(x, y), z) - -/** - * max3 - return maximum of three values - * @x: first value - * @y: second value - * @z: third value - */ -#define max3(x, y, z) max((typeof(x))max(x, y), z) - -/** - * min_not_zero - return the minimum that is _not_ zero, unless both are zero - * @x: value1 - * @y: value2 - */ -#define min_not_zero(x, y) ({ \ - typeof(x) __x = (x); \ - typeof(y) __y = (y); \ - __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) - -/** - * clamp - return a value clamped to a given range with strict typechecking - * @val: current value - * @lo: lowest allowable value - * @hi: highest allowable value - * - * This macro does strict typechecking of @lo/@hi to make sure they are of the - * same type as @val. See the unnecessary pointer comparisons. - */ -#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) - -/* - * ..and if you can't take the strict - * types, you can specify one yourself. - * - * Or not use min/max/clamp at all, of course. - */ - -/** - * min_t - return minimum of two values, using the specified type - * @type: data type to use - * @x: first value - * @y: second value - */ -#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <) - -/** - * max_t - return maximum of two values, using the specified type - * @type: data type to use - * @x: first value - * @y: second value - */ -#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >) - -/** - * clamp_t - return a value clamped to a given range using a given type - * @type: the type of variable to use - * @val: current value - * @lo: minimum allowable value - * @hi: maximum allowable value - * - * This macro does no typechecking and uses temporary variables of type - * @type to make all the comparisons. - */ -#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) - -/** - * clamp_val - return a value clamped to a given range using val's type - * @val: current value - * @lo: minimum allowable value - * @hi: maximum allowable value - * - * This macro does no typechecking and uses temporary variables of whatever - * type the input argument @val is. This is useful when @val is an unsigned - * type and @lo and @hi are literals that will otherwise be assigned a signed - * integer type. - */ -#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) - - -/** - * swap - swap values of @a and @b - * @a: first value - * @b: second value - */ -#define swap(a, b) \ - do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) - /* This counts to 12. Any more, it will return 13th argument. */ #define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n #define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) diff --git a/include/linux/minmax.h b/include/linux/minmax.h new file mode 100644 index 000000000000..c0f57b0c64d9 --- /dev/null +++ b/include/linux/minmax.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MINMAX_H +#define _LINUX_MINMAX_H + +/* + * min()/max()/clamp() macros must accomplish three things: + * + * - avoid multiple evaluations of the arguments (so side-effects like + * "x++" happen only once) when non-constant. + * - perform strict type-checking (to generate warnings instead of + * nasty runtime surprises). See the "unnecessary" pointer comparison + * in __typecheck(). + * - retain result as a constant expressions when called with only + * constant expressions (to avoid tripping VLA warnings in stack + * allocation usage). + */ +#define __typecheck(x, y) \ + (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) + +/* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + +#define __no_side_effects(x, y) \ + (__is_constexpr(x) && __is_constexpr(y)) + +#define __safe_cmp(x, y) \ + (__typecheck(x, y) && __no_side_effects(x, y)) + +#define __cmp(x, y, op) ((x) op (y) ? (x) : (y)) + +#define __cmp_once(x, y, unique_x, unique_y, op) ({ \ + typeof(x) unique_x = (x); \ + typeof(y) unique_y = (y); \ + __cmp(unique_x, unique_y, op); }) + +#define __careful_cmp(x, y, op) \ + __builtin_choose_expr(__safe_cmp(x, y), \ + __cmp(x, y, op), \ + __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) + +/** + * min - return minimum of two values of the same or compatible types + * @x: first value + * @y: second value + */ +#define min(x, y) __careful_cmp(x, y, <) + +/** + * max - return maximum of two values of the same or compatible types + * @x: first value + * @y: second value + */ +#define max(x, y) __careful_cmp(x, y, >) + +/** + * min3 - return minimum of three values + * @x: first value + * @y: second value + * @z: third value + */ +#define min3(x, y, z) min((typeof(x))min(x, y), z) + +/** + * max3 - return maximum of three values + * @x: first value + * @y: second value + * @z: third value + */ +#define max3(x, y, z) max((typeof(x))max(x, y), z) + +/** + * min_not_zero - return the minimum that is _not_ zero, unless both are zero + * @x: value1 + * @y: value2 + */ +#define min_not_zero(x, y) ({ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) + +/** + * clamp - return a value clamped to a given range with strict typechecking + * @val: current value + * @lo: lowest allowable value + * @hi: highest allowable value + * + * This macro does strict typechecking of @lo/@hi to make sure they are of the + * same type as @val. See the unnecessary pointer comparisons. + */ +#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) + +/* + * ..and if you can't take the strict + * types, you can specify one yourself. + * + * Or not use min/max/clamp at all, of course. + */ + +/** + * min_t - return minimum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <) + +/** + * max_t - return maximum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >) + +/** + * clamp_t - return a value clamped to a given range using a given type + * @type: the type of variable to use + * @val: current value + * @lo: minimum allowable value + * @hi: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of type + * @type to make all the comparisons. + */ +#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) + +/** + * clamp_val - return a value clamped to a given range using val's type + * @val: current value + * @lo: minimum allowable value + * @hi: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of whatever + * type the input argument @val is. This is useful when @val is an unsigned + * type and @lo and @hi are literals that will otherwise be assigned a signed + * integer type. + */ +#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) + +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value + */ +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +#endif /* _LINUX_MINMAX_H */ diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 3334ce056335..ac398e143c9a 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -90,9 +90,9 @@ * for such situations. See below and CPUMASK_ALLOC also. */ -#include #include #include +#include #include typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 1ae36bc8db35..ef084eacaa7c 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -3,6 +3,7 @@ #define __LINUX_UACCESS_H__ #include +#include #include #include -- cgit v1.2.3 From 3b6742618ed9216dd6caad968fe8c83b32dff485 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 15 Oct 2020 20:11:17 -0700 Subject: lib/idr.c: document calling context for IDA APIs mustn't use locks The documentation for these functions indicates that callers don't need to hold a lock while calling them, but that documentation is only in one place under "IDA Usage". Let's state the same information on each IDA function so that it's clear what the calling context requires. Furthermore, let's document ida_simple_get() with the same information so that callers know how this API works. Signed-off-by: Stephen Boyd Signed-off-by: Andrew Morton Reviewed-by: Greg Kroah-Hartman Cc: Tri Vo Cc: Jonathan Corbet Cc: Matthew Wilcox Link: https://lkml.kernel.org/r/20200910055246.2297797-1-swboyd@chromium.org Signed-off-by: Linus Torvalds --- include/linux/idr.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 3ade03e5c7af..b235ed987021 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -263,7 +263,8 @@ void ida_destroy(struct ida *ida); * * Allocate an ID between 0 and %INT_MAX, inclusive. * - * Context: Any context. + * Context: Any context. It is safe to call this function without + * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ @@ -280,7 +281,8 @@ static inline int ida_alloc(struct ida *ida, gfp_t gfp) * * Allocate an ID between @min and %INT_MAX, inclusive. * - * Context: Any context. + * Context: Any context. It is safe to call this function without + * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ @@ -297,7 +299,8 @@ static inline int ida_alloc_min(struct ida *ida, unsigned int min, gfp_t gfp) * * Allocate an ID between 0 and @max, inclusive. * - * Context: Any context. + * Context: Any context. It is safe to call this function without + * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ -- cgit v1.2.3 From 3264ceec8f17a99a3895de7de06b4d7e9c8f3f30 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 15 Oct 2020 20:11:21 -0700 Subject: lib/idr.c: document that ida_simple_{get,remove}() are deprecated These two functions are deprecated. Users should call ida_alloc() or ida_free() respectively instead. Add documentation to this effect until the macro can be removed. Signed-off-by: Stephen Boyd Signed-off-by: Andrew Morton Reviewed-by: Tri Vo Cc: Greg KH Cc: Jonathan Corbet Cc: Matthew Wilcox Link: https://lkml.kernel.org/r/20200910055246.2297797-2-swboyd@chromium.org Signed-off-by: Linus Torvalds --- include/linux/idr.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index b235ed987021..a0dce14090a9 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -314,6 +314,10 @@ static inline void ida_init(struct ida *ida) xa_init_flags(&ida->xa, IDA_INIT_FLAGS); } +/* + * ida_simple_get() and ida_simple_remove() are deprecated. Use + * ida_alloc() and ida_free() instead respectively. + */ #define ida_simple_get(ida, start, end, gfp) \ ida_alloc_range(ida, start, (end) - 1, gfp) #define ida_simple_remove(ida, id) ida_free(ida, id) -- cgit v1.2.3 From e130816164e244b692921de49771eeb28205152d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 15 Oct 2020 20:11:31 -0700 Subject: include/linux/list.h: add a macro to test if entry is pointing to the head Add a macro to test if entry is pointing to the head of the list which is useful in cases like: list_for_each_entry(pos, &head, member) { if (cond) break; } if (list_entry_is_head(pos, &head, member)) return -ERRNO; that allows to avoid additional variable to be added to track if loop has not been stopped in the middle. While here, convert list_for_each_entry*() family of macros to use a new one. Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton Reviewed-by: Cezary Rojewski Link: https://lkml.kernel.org/r/20200929134342.51489-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Torvalds --- include/linux/list.h | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 0d0d17a10d25..a18c87b63376 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -609,6 +609,15 @@ static inline void list_splice_tail_init(struct list_head *list, pos != (head); \ pos = n, n = pos->prev) +/** + * list_entry_is_head - test if the entry points to the head of the list + * @pos: the type * to cursor + * @head: the head for your list. + * @member: the name of the list_head within the struct. + */ +#define list_entry_is_head(pos, head, member) \ + (&pos->member == (head)) + /** * list_for_each_entry - iterate over list of given type * @pos: the type * to use as a loop cursor. @@ -617,7 +626,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry(pos, head, member) \ for (pos = list_first_entry(head, typeof(*pos), member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = list_next_entry(pos, member)) /** @@ -628,7 +637,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry_reverse(pos, head, member) \ for (pos = list_last_entry(head, typeof(*pos), member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = list_prev_entry(pos, member)) /** @@ -653,7 +662,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry_continue(pos, head, member) \ for (pos = list_next_entry(pos, member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = list_next_entry(pos, member)) /** @@ -667,7 +676,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry_continue_reverse(pos, head, member) \ for (pos = list_prev_entry(pos, member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = list_prev_entry(pos, member)) /** @@ -679,7 +688,7 @@ static inline void list_splice_tail_init(struct list_head *list, * Iterate over list of given type, continuing from current position. */ #define list_for_each_entry_from(pos, head, member) \ - for (; &pos->member != (head); \ + for (; !list_entry_is_head(pos, head, member); \ pos = list_next_entry(pos, member)) /** @@ -692,7 +701,7 @@ static inline void list_splice_tail_init(struct list_head *list, * Iterate backwards over list of given type, continuing from current position. */ #define list_for_each_entry_from_reverse(pos, head, member) \ - for (; &pos->member != (head); \ + for (; !list_entry_is_head(pos, head, member); \ pos = list_prev_entry(pos, member)) /** @@ -705,7 +714,7 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each_entry_safe(pos, n, head, member) \ for (pos = list_first_entry(head, typeof(*pos), member), \ n = list_next_entry(pos, member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = n, n = list_next_entry(n, member)) /** @@ -721,7 +730,7 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each_entry_safe_continue(pos, n, head, member) \ for (pos = list_next_entry(pos, member), \ n = list_next_entry(pos, member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = n, n = list_next_entry(n, member)) /** @@ -736,7 +745,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry_safe_from(pos, n, head, member) \ for (n = list_next_entry(pos, member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = n, n = list_next_entry(n, member)) /** @@ -752,7 +761,7 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each_entry_safe_reverse(pos, n, head, member) \ for (pos = list_last_entry(head, typeof(*pos), member), \ n = list_prev_entry(pos, member); \ - &pos->member != (head); \ + !list_entry_is_head(pos, head, member); \ pos = n, n = list_prev_entry(n, member)) /** -- cgit v1.2.3 From a9eb63705e379f10a3c9d13fc6aee8b50805e862 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 15 Oct 2020 20:11:41 -0700 Subject: bitops: simplify get_count_order_long() These two cases could be unified into one. Signed-off-by: Wei Yang Signed-off-by: Andrew Morton Cc: Christian Brauner Cc: Andy Shevchenko Link: https://lkml.kernel.org/r/20200807085837.11697-2-richard.weiyang@linux.alibaba.com Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 99f2ac30b1d9..030a98f0c452 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -206,10 +206,7 @@ static inline int get_count_order_long(unsigned long l) { if (l == 0UL) return -1; - else if (l & (l - 1UL)) - return (int)fls_long(l); - else - return (int)fls_long(l) - 1; + return (int)fls_long(--l); } /** -- cgit v1.2.3 From 004fba1ae6ddd66ba0faa4f60c603b3ca77b3554 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 15 Oct 2020 20:11:46 -0700 Subject: bitops: use the same mechanism for get_count_order[_long] These two functions share the same logic. Signed-off-by: Wei Yang Signed-off-by: Andrew Morton Cc: Christian Brauner Cc: Andy Shevchenko Link: https://lkml.kernel.org/r/20200807085837.11697-3-richard.weiyang@linux.alibaba.com Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 030a98f0c452..5b74bdf159d6 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -188,12 +188,10 @@ static inline unsigned fls_long(unsigned long l) static inline int get_count_order(unsigned int count) { - int order; + if (count == 0) + return -1; - order = fls(count) - 1; - if (count & (count - 1)) - order++; - return order; + return fls(--count); } /** -- cgit v1.2.3 From afc63a97b764bc5a715762d0d9cc9785c2ef4e75 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 15 Oct 2020 20:12:46 -0700 Subject: coredump: refactor page range dumping into common helper Both fs/binfmt_elf.c and fs/binfmt_elf_fdpic.c need to dump ranges of pages into the coredump file. Extract that logic into a common helper. Signed-off-by: Jann Horn Signed-off-by: Andrew Morton Acked-by: Linus Torvalds Cc: Christoph Hellwig Cc: Alexander Viro Cc: "Eric W . Biederman" Cc: Oleg Nesterov Cc: Hugh Dickins Link: http://lkml.kernel.org/r/20200827114932.3572699-4-jannh@google.com Signed-off-by: Linus Torvalds --- include/linux/coredump.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 7a899e83835d..f0b71a74d0bc 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -16,6 +16,8 @@ extern int dump_skip(struct coredump_params *cprm, size_t nr); extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); extern void dump_truncate(struct coredump_params *cprm); +int dump_user_range(struct coredump_params *cprm, unsigned long start, + unsigned long len); #ifdef CONFIG_COREDUMP extern void do_coredump(const kernel_siginfo_t *siginfo); #else -- cgit v1.2.3 From 429a22e776a2b9f85a2b9c53d8e647598b553dd1 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 15 Oct 2020 20:12:50 -0700 Subject: coredump: rework elf/elf_fdpic vma_dump_size() into common helper At the moment, the binfmt_elf and binfmt_elf_fdpic code have slightly different code to figure out which VMAs should be dumped, and if so, whether the dump should contain the entire VMA or just its first page. Eliminate duplicate code by reworking the binfmt_elf version into a generic core dumping helper in coredump.c. As part of that, change the heuristic for detecting executable/library header pages to check whether the inode is executable instead of looking at the file mode. This is less problematic in terms of locking because it lets us avoid get_user() under the mmap_sem. (And arguably it looks nicer and makes more sense in generic code.) Adjust a little bit based on the binfmt_elf_fdpic version: ->anon_vma is only meaningful under CONFIG_MMU, otherwise we have to assume that the VMA has been written to. Suggested-by: Linus Torvalds Signed-off-by: Jann Horn Signed-off-by: Andrew Morton Acked-by: Linus Torvalds Cc: Christoph Hellwig Cc: Alexander Viro Cc: "Eric W . Biederman" Cc: Oleg Nesterov Cc: Hugh Dickins Link: http://lkml.kernel.org/r/20200827114932.3572699-5-jannh@google.com Signed-off-by: Linus Torvalds --- include/linux/coredump.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index f0b71a74d0bc..bfecb8d79a7f 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -16,6 +16,7 @@ extern int dump_skip(struct coredump_params *cprm, size_t nr); extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); extern void dump_truncate(struct coredump_params *cprm); +unsigned long vma_dump_size(struct vm_area_struct *vma, unsigned long mm_flags); int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len); #ifdef CONFIG_COREDUMP -- cgit v1.2.3 From a07279c9a8cd7dbd321640ff7210591599ee00a4 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 15 Oct 2020 20:12:54 -0700 Subject: binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot In both binfmt_elf and binfmt_elf_fdpic, use a new helper dump_vma_snapshot() to take a snapshot of the VMA list (including the gate VMA, if we have one) while protected by the mmap_lock, and then use that snapshot instead of walking the VMA list without locking. An alternative approach would be to keep the mmap_lock held across the entire core dumping operation; however, keeping the mmap_lock locked while we may be blocked for an unbounded amount of time (e.g. because we're dumping to a FUSE filesystem or so) isn't really optimal; the mmap_lock blocks things like the ->release handler of userfaultfd, and we don't really want critical system daemons to grind to a halt just because someone "gifted" them SCM_RIGHTS to an eternally-locked userfaultfd, or something like that. Since both the normal ELF code and the FDPIC ELF code need this functionality (and if any other binfmt wants to add coredump support in the future, they'd probably need it, too), implement this with a common helper in fs/coredump.c. A downside of this approach is that we now need a bigger amount of kernel memory per userspace VMA in the normal ELF case, and that we need O(n) kernel memory in the FDPIC ELF case at all; but 40 bytes per VMA shouldn't be terribly bad. There currently is a data race between stack expansion and anything that reads ->vm_start or ->vm_end under the mmap_lock held in read mode; to mitigate that for core dumping, take the mmap_lock in write mode when taking a snapshot of the VMA hierarchy. (If we only took the mmap_lock in read mode, we could end up with a corrupted core dump if someone does get_user_pages_remote() concurrently. Not really a major problem, but taking the mmap_lock either way works here, so we might as well avoid the issue.) (This doesn't do anything about the existing data races with stack expansion in other mm code.) Signed-off-by: Jann Horn Signed-off-by: Andrew Morton Acked-by: Linus Torvalds Cc: Christoph Hellwig Cc: Alexander Viro Cc: "Eric W . Biederman" Cc: Oleg Nesterov Cc: Hugh Dickins Link: http://lkml.kernel.org/r/20200827114932.3572699-6-jannh@google.com Signed-off-by: Linus Torvalds --- include/linux/coredump.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index bfecb8d79a7f..e58e8c207782 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -7,6 +7,12 @@ #include #include +struct core_vma_metadata { + unsigned long start, end; + unsigned long flags; + unsigned long dump_size; +}; + /* * These are the only things you should do on a core-file: use only these * functions to write out all the necessary info. @@ -16,9 +22,11 @@ extern int dump_skip(struct coredump_params *cprm, size_t nr); extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); extern void dump_truncate(struct coredump_params *cprm); -unsigned long vma_dump_size(struct vm_area_struct *vma, unsigned long mm_flags); int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len); +int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, + struct core_vma_metadata **vma_meta, + size_t *vma_data_size_ptr); #ifdef CONFIG_COREDUMP extern void do_coredump(const kernel_siginfo_t *siginfo); #else -- cgit v1.2.3 From 4d45e75a9955ade5c2f49bd96fc4173b2cec9a72 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 15 Oct 2020 20:13:00 -0700 Subject: mm: remove the now-unnecessary mmget_still_valid() hack The preceding patches have ensured that core dumping properly takes the mmap_lock. Thanks to that, we can now remove mmget_still_valid() and all its users. Signed-off-by: Jann Horn Signed-off-by: Andrew Morton Acked-by: Linus Torvalds Cc: Christoph Hellwig Cc: Alexander Viro Cc: "Eric W . Biederman" Cc: Oleg Nesterov Cc: Hugh Dickins Link: http://lkml.kernel.org/r/20200827114932.3572699-8-jannh@google.com Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 15bfb06f2884..981e34cb1409 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -49,31 +49,6 @@ static inline void mmdrop(struct mm_struct *mm) __mmdrop(mm); } -/* - * This has to be called after a get_task_mm()/mmget_not_zero() - * followed by taking the mmap_lock for writing before modifying the - * vmas or anything the coredump pretends not to change from under it. - * - * It also has to be called when mmgrab() is used in the context of - * the process, but then the mm_count refcount is transferred outside - * the context of the process to run down_write() on that pinned mm. - * - * NOTE: find_extend_vma() called from GUP context is the only place - * that can modify the "mm" (notably the vm_start/end) under mmap_lock - * for reading and outside the context of the process, so it is also - * the only case that holds the mmap_lock for reading that must call - * this function. Generally if the mmap_lock is hold for reading - * there's no need of this check after get_task_mm()/mmget_not_zero(). - * - * This function can be obsoleted and the check can be removed, after - * the coredump code will hold the mmap_lock for writing before - * invoking the ->core_dump methods. - */ -static inline bool mmget_still_valid(struct mm_struct *mm) -{ - return likely(!mm->core_state); -} - /** * mmget() - Pin the address space associated with a &struct mm_struct. * @mm: The address space to pin. -- cgit v1.2.3 From 5cf53f3ce3b9ff5321b56f9ed9d90d59307be7d0 Mon Sep 17 00:00:00 2001 From: Elena Petrova Date: Thu, 15 Oct 2020 20:13:35 -0700 Subject: sched.h: drop in_ubsan field when UBSAN is in trap mode in_ubsan field of task_struct is only used in lib/ubsan.c, which in its turn is used only `ifneq ($(CONFIG_UBSAN_TRAP),y)`. Removing unnecessary field from a task_struct will help preserve the ABI between vanilla and CONFIG_UBSAN_TRAP'ed kernels. In particular, this will help enabling bounds sanitizer transparently for Android's GKI. Signed-off-by: Elena Petrova Signed-off-by: Andrew Morton Acked-by: Kees Cook Cc: Jann Horn Link: https://lkml.kernel.org/r/20200910134802.3160311-1-lenaptr@google.com Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9030f3abd969..063cd120b459 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1013,7 +1013,7 @@ struct task_struct { struct held_lock held_locks[MAX_LOCK_DEPTH]; #endif -#ifdef CONFIG_UBSAN +#if defined(CONFIG_UBSAN) && !defined(CONFIG_UBSAN_TRAP) unsigned int in_ubsan; #endif -- cgit v1.2.3 From 2c739ced5886cd8c8361faa79a9522ec05174ed0 Mon Sep 17 00:00:00 2001 From: Albert van der Linde Date: Thu, 15 Oct 2020 20:13:46 -0700 Subject: lib, include/linux: add usercopy failure capability Patch series "add fault injection to user memory access", v3. The goal of this series is to improve testing of fault-tolerance in usages of user memory access functions, by adding support for fault injection. syzkaller/syzbot are using the existing fault injection modes and will use this particular feature also. The first patch adds failure injection capability for usercopy functions. The second changes usercopy functions to use this new failure capability (copy_from_user, ...). The third patch adds get/put/clear_user failures to x86. This patch (of 3): Add a failure injection capability to improve testing of fault-tolerance in usages of user memory access functions. Add CONFIG_FAULT_INJECTION_USERCOPY to enable faults in usercopy functions. The should_fail_usercopy function is to be called by these functions (copy_from_user, get_user, ...) in order to fail or not. Signed-off-by: Albert van der Linde Signed-off-by: Andrew Morton Reviewed-by: Akinobu Mita Reviewed-by: Alexander Potapenko Cc: Borislav Petkov Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Thomas Gleixner Cc: Arnd Bergmann Cc: Peter Zijlstra (Intel) Cc: "H. Peter Anvin" Cc: Al Viro Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Marco Elver Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20200831171733.955393-1-alinde@google.com Link: http://lkml.kernel.org/r/20200831171733.955393-2-alinde@google.com Signed-off-by: Linus Torvalds --- include/linux/fault-inject-usercopy.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/linux/fault-inject-usercopy.h (limited to 'include/linux') diff --git a/include/linux/fault-inject-usercopy.h b/include/linux/fault-inject-usercopy.h new file mode 100644 index 000000000000..56c3a693fdd9 --- /dev/null +++ b/include/linux/fault-inject-usercopy.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_FAULT_INJECT_USERCOPY_H__ +#define __LINUX_FAULT_INJECT_USERCOPY_H__ + +/* + * This header provides a wrapper for injecting failures to user space memory + * access functions. + */ + +#include + +#ifdef CONFIG_FAULT_INJECTION_USERCOPY + +bool should_fail_usercopy(void); + +#else + +static inline bool should_fail_usercopy(void) { return false; } + +#endif /* CONFIG_FAULT_INJECTION_USERCOPY */ + +#endif /* __LINUX_FAULT_INJECT_USERCOPY_H__ */ -- cgit v1.2.3 From 4d0e9df5e43dba52d38b251e3b909df8fa1110be Mon Sep 17 00:00:00 2001 From: Albert van der Linde Date: Thu, 15 Oct 2020 20:13:50 -0700 Subject: lib, uaccess: add failure injection to usercopy functions To test fault-tolerance of user memory access functions, introduce fault injection to usercopy functions. If a failure is expected return either -EFAULT or the total amount of bytes that were not copied. Signed-off-by: Albert van der Linde Signed-off-by: Andrew Morton Reviewed-by: Akinobu Mita Reviewed-by: Alexander Potapenko Cc: Al Viro Cc: Andrey Konovalov Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Marco Elver Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20200831171733.955393-3-alinde@google.com Signed-off-by: Linus Torvalds --- include/linux/uaccess.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index ef084eacaa7c..1b8c9d6162bc 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -2,6 +2,7 @@ #ifndef __LINUX_UACCESS_H__ #define __LINUX_UACCESS_H__ +#include #include #include #include @@ -84,6 +85,8 @@ static __always_inline __must_check unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_fault(); + if (should_fail_usercopy()) + return n; instrument_copy_from_user(to, from, n); check_object_size(to, n, false); return raw_copy_from_user(to, from, n); @@ -105,6 +108,8 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) static __always_inline __must_check unsigned long __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { + if (should_fail_usercopy()) + return n; instrument_copy_to_user(to, from, n); check_object_size(from, n, true); return raw_copy_to_user(to, from, n); @@ -114,6 +119,8 @@ static __always_inline __must_check unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); + if (should_fail_usercopy()) + return n; instrument_copy_to_user(to, from, n); check_object_size(from, n, true); return raw_copy_to_user(to, from, n); @@ -125,7 +132,7 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); - if (likely(access_ok(from, n))) { + if (!should_fail_usercopy() && likely(access_ok(from, n))) { instrument_copy_from_user(to, from, n); res = raw_copy_from_user(to, from, n); } @@ -143,6 +150,8 @@ static inline __must_check unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); + if (should_fail_usercopy()) + return n; if (access_ok(to, n)) { instrument_copy_to_user(to, from, n); n = raw_copy_to_user(to, from, n); -- cgit v1.2.3 From 98447d65b4a7a59f8ea37dc6e5d743247d9a7b01 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 14 Oct 2020 10:48:51 -0600 Subject: io_uring: move io identity items into separate struct io-wq contains a pointer to the identity, which we just hold in io_kiocb for now. This is in preparation for putting this outside io_kiocb. The only exception is struct files_struct, which we'll need different rules for to avoid a circular dependency. No functional changes in this patch. Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 96315cfaf6d1..352aa6bbd36b 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -4,7 +4,18 @@ #include #include -#include + +struct io_identity { + struct files_struct *files; + struct mm_struct *mm; +#ifdef CONFIG_BLK_CGROUP + struct cgroup_subsys_state *blkcg_css; +#endif + const struct cred *creds; + struct nsproxy *nsproxy; + struct fs_struct *fs; + unsigned long fsize; +}; struct io_uring_task { /* submission side */ -- cgit v1.2.3 From 1e6fa5216a0e59ef02e8b6b40d553238a3b81d49 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Oct 2020 08:46:24 -0600 Subject: io_uring: COW io_identity on mismatch If the io_identity doesn't completely match the task, then create a copy of it and use that. The existing copy remains valid until the last user of it has gone away. This also changes the personality lookup to be indexed by io_identity, instead of creds directly. Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 352aa6bbd36b..342cc574d5c0 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -15,6 +15,7 @@ struct io_identity { struct nsproxy *nsproxy; struct fs_struct *fs; unsigned long fsize; + refcount_t count; }; struct io_uring_task { -- cgit v1.2.3 From 5c3462cfd123b341c9d3c947c1a2bab373f1697f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Oct 2020 09:02:33 -0600 Subject: io_uring: store io_identity in io_uring_task This is, by definition, a per-task structure. So store it in the task context, instead of doing carrying it in each io_kiocb. We're being a bit inefficient if members have changed, as that requires an alloc and copy of a new io_identity struct. The next patch will fix that up. Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 342cc574d5c0..bd3346194bca 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -24,6 +24,7 @@ struct io_uring_task { struct wait_queue_head wait; struct file *last; atomic_long_t req_issue; + struct io_identity identity; /* completion side */ bool in_idle ____cacheline_aligned_in_smp; -- cgit v1.2.3 From 500a373d731ac506612db12631ec21295c1ff360 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Oct 2020 17:38:03 -0600 Subject: io_uring: assign new io_identity for task if members have changed This avoids doing a copy for each new async IO, if some parts of the io_identity has changed. We avoid reference counting for the normal fast path of nothing ever changing. Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index bd3346194bca..607d14f61132 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -24,7 +24,8 @@ struct io_uring_task { struct wait_queue_head wait; struct file *last; atomic_long_t req_issue; - struct io_identity identity; + struct io_identity __identity; + struct io_identity *identity; /* completion side */ bool in_idle ____cacheline_aligned_in_smp; -- cgit v1.2.3 From d8a6df10aac9f2e4d5f30aff3129d552d2984ce7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Oct 2020 16:24:45 -0600 Subject: io_uring: use percpu counters to track inflight requests Even though we place the req_issued and req_complete in separate cachelines, there's considerable overhead in doing the atomics particularly on the completion side. Get rid of having the two counters, and just use a percpu_counter for this. That's what it was made for, after all. This considerably reduces the overhead in __io_free_req(). Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 607d14f61132..28939820b6b0 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -23,13 +23,10 @@ struct io_uring_task { struct xarray xa; struct wait_queue_head wait; struct file *last; - atomic_long_t req_issue; + struct percpu_counter inflight; struct io_identity __identity; struct io_identity *identity; - - /* completion side */ - bool in_idle ____cacheline_aligned_in_smp; - atomic_long_t req_complete; + bool in_idle; }; #if defined(CONFIG_IO_URING) -- cgit v1.2.3 From 4ea33a976bfe79293965d0815e1914e4b6e58967 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Oct 2020 13:46:44 -0600 Subject: io-wq: inherit audit loginuid and sessionid Make sure the async io-wq workers inherit the loginuid and sessionid from the original task, and restore them to unset once we're done with the async work item. While at it, disable the ability for kernel threads to write to their own loginuid. Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 28939820b6b0..868364cea3b7 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -15,6 +15,10 @@ struct io_identity { struct nsproxy *nsproxy; struct fs_struct *fs; unsigned long fsize; +#ifdef CONFIG_AUDIT + kuid_t loginuid; + unsigned int sessionid; +#endif refcount_t count; }; -- cgit v1.2.3 From 3c532798ec96b6c2d77706f04ed1d8b566a805df Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 3 Oct 2020 10:49:22 -0600 Subject: tracehook: clear TIF_NOTIFY_RESUME in tracehook_notify_resume() All the callers currently do this, clean it up and move the clearing into tracehook_notify_resume() instead. Reviewed-by: Oleg Nesterov Reviewed-by: Thomas Gleixner Signed-off-by: Jens Axboe --- include/linux/tracehook.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 36fb3bbed6b2..b480e1a07ed8 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -178,9 +178,9 @@ static inline void set_notify_resume(struct task_struct *task) */ static inline void tracehook_notify_resume(struct pt_regs *regs) { + clear_thread_flag(TIF_NOTIFY_RESUME); /* - * The caller just cleared TIF_NOTIFY_RESUME. This barrier - * pairs with task_work_add()->set_notify_resume() after + * This barrier pairs with task_work_add()->set_notify_resume() after * hlist_add_head(task->task_works); */ smp_mb__after_atomic(); -- cgit v1.2.3 From 91989c707884ecc7cd537281ab1a4b8fb7219da3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 16 Oct 2020 09:02:26 -0600 Subject: task_work: cleanup notification modes A previous commit changed the notification mode from true/false to an int, allowing notify-no, notify-yes, or signal-notify. This was backwards compatible in the sense that any existing true/false user would translate to either 0 (on notification sent) or 1, the latter which mapped to TWA_RESUME. TWA_SIGNAL was assigned a value of 2. Clean this up properly, and define a proper enum for the notification mode. Now we have: - TWA_NONE. This is 0, same as before the original change, meaning no notification requested. - TWA_RESUME. This is 1, same as before the original change, meaning that we use TIF_NOTIFY_RESUME. - TWA_SIGNAL. This uses TIF_SIGPENDING/JOBCTL_TASK_WORK for the notification. Clean up all the callers, switching their 0/1/false/true to using the appropriate TWA_* mode for notifications. Fixes: e91b48162332 ("task_work: teach task_work_add() to do signal_wake_up()") Reviewed-by: Thomas Gleixner Signed-off-by: Jens Axboe --- include/linux/task_work.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 0fb93aafa478..0d848a1e9e62 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -13,9 +13,14 @@ init_task_work(struct callback_head *twork, task_work_func_t func) twork->func = func; } -#define TWA_RESUME 1 -#define TWA_SIGNAL 2 -int task_work_add(struct task_struct *task, struct callback_head *twork, int); +enum task_work_notify_mode { + TWA_NONE, + TWA_RESUME, + TWA_SIGNAL, +}; + +int task_work_add(struct task_struct *task, struct callback_head *twork, + enum task_work_notify_mode mode); struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t); void task_work_run(void); -- cgit v1.2.3 From 15a119e09344a346384ec05c781c126a29b18235 Mon Sep 17 00:00:00 2001 From: Hui Su Date: Wed, 23 Sep 2020 01:12:31 +0800 Subject: jbd2: fix the comment of struct jbd2_journal_handle the struct name was modified long ago, but the comment still use struct handle_s. Signed-off-by: Hui Su Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20200922171231.GA53120@rlk Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 08f904943ab2..a1ef05412acf 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -452,8 +452,8 @@ struct jbd2_inode { struct jbd2_revoke_table_s; /** - * struct handle_s - The handle_s type is the concrete type associated with - * handle_t. + * struct jbd2_journal_handle - The jbd2_journal_handle type is the concrete + * type associated with handle_t. * @h_transaction: Which compound transaction is this update a part of? * @h_journal: Which journal handle belongs to - used iff h_reserved set. * @h_rsv_handle: Handle reserved for finishing the logical operation. -- cgit v1.2.3 From aa3c0c61f62d682259e3e66cdc01846290f9cd6c Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Mon, 5 Oct 2020 21:48:38 -0300 Subject: jbd2: introduce/export functions jbd2_journal_submit|finish_inode_data_buffers() Export functions that implement the current behavior done for an inode in journal_submit|finish_inode_data_buffers(). No functional change. Signed-off-by: Mauricio Faria de Oliveira Suggested-by: Jan Kara Reviewed-by: Jan Kara Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201006004841.600488-2-mfo@canonical.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a1ef05412acf..8b7b06066bc2 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1421,6 +1421,10 @@ extern int jbd2_journal_inode_ranged_write(handle_t *handle, extern int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *inode, loff_t start_byte, loff_t length); +extern int jbd2_journal_submit_inode_data_buffers( + struct jbd2_inode *jinode); +extern int jbd2_journal_finish_inode_data_buffers( + struct jbd2_inode *jinode); extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, struct jbd2_inode *inode, loff_t new_size); extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); -- cgit v1.2.3 From 342af94ec6c02aa478fe2adcd41b950e154b03ba Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Mon, 5 Oct 2020 21:48:39 -0300 Subject: jbd2, ext4, ocfs2: introduce/use journal callbacks j_submit|finish_inode_data_buffers() Introduce journal callbacks to allow different behaviors for an inode in journal_submit|finish_inode_data_buffers(). The existing users of the current behavior (ext4, ocfs2) are adapted to use the previously exported functions that implement the current behavior. Users are callers of jbd2_journal_inode_ranged_write|wait(), which adds the inode to the transaction's inode list with the JI_WRITE|WAIT_DATA flags. Only ext4 and ocfs2 in-tree. Both CONFIG_EXT4_FS and CONFIG_OCSFS2_FS select CONFIG_JBD2, which builds fs/jbd2/commit.c and journal.c that define and export the functions, so we can call directly in ext4/ocfs2. Signed-off-by: Mauricio Faria de Oliveira Suggested-by: Jan Kara Reviewed-by: Jan Kara Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201006004841.600488-3-mfo@canonical.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 8b7b06066bc2..04afa6dcd60d 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -629,7 +629,9 @@ struct transaction_s struct journal_head *t_shadow_list; /* - * List of inodes whose data we've modified in data=ordered mode. + * List of inodes associated with the transaction; e.g., ext4 uses + * this to track inodes in data=ordered and data=journal mode that + * need special handling on transaction commit; also used by ocfs2. * [j_list_lock] */ struct list_head t_inode_list; @@ -1111,6 +1113,27 @@ struct journal_s void (*j_commit_callback)(journal_t *, transaction_t *); + /** + * @j_submit_inode_data_buffers: + * + * This function is called for all inodes associated with the + * committing transaction marked with JI_WRITE_DATA flag + * before we start to write out the transaction to the journal. + */ + int (*j_submit_inode_data_buffers) + (struct jbd2_inode *); + + /** + * @j_finish_inode_data_buffers: + * + * This function is called for all inodes associated with the + * committing transaction marked with JI_WAIT_DATA flag + * after we have written the transaction to the journal + * but before we write out the commit block. + */ + int (*j_finish_inode_data_buffers) + (struct jbd2_inode *); + /* * Journal statistics */ -- cgit v1.2.3 From b87d8cefe43c7f22e8aa13919c1dfa2b4b4b4e01 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sat, 17 Oct 2020 16:13:40 -0700 Subject: mm, memcg: rework remote charging API to support nesting Currently the remote memcg charging API consists of two functions: memalloc_use_memcg() and memalloc_unuse_memcg(), which set and clear the memcg value, which overwrites the memcg of the current task. memalloc_use_memcg(target_memcg); <...> memalloc_unuse_memcg(); It works perfectly for allocations performed from a normal context, however an attempt to call it from an interrupt context or just nest two remote charging blocks will lead to an incorrect accounting. On exit from the inner block the active memcg will be cleared instead of being restored. memalloc_use_memcg(target_memcg); memalloc_use_memcg(target_memcg_2); <...> memalloc_unuse_memcg(); Error: allocation here are charged to the memcg of the current process instead of target_memcg. memalloc_unuse_memcg(); This patch extends the remote charging API by switching to a single function: struct mem_cgroup *set_active_memcg(struct mem_cgroup *memcg), which sets the new value and returns the old one. So a remote charging block will look like: old_memcg = set_active_memcg(target_memcg); <...> set_active_memcg(old_memcg); This patch is heavily based on the patch by Johannes Weiner, which can be found here: https://lkml.org/lkml/2020/5/28/806 . Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Cc: Johannes Weiner Cc: Dan Schatzberg Link: https://lkml.kernel.org/r/20200821212056.3769116-1-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 981e34cb1409..1a80fb128e74 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -280,38 +280,28 @@ static inline void memalloc_nocma_restore(unsigned int flags) #ifdef CONFIG_MEMCG /** - * memalloc_use_memcg - Starts the remote memcg charging scope. + * set_active_memcg - Starts the remote memcg charging scope. * @memcg: memcg to charge. * * This function marks the beginning of the remote memcg charging scope. All the * __GFP_ACCOUNT allocations till the end of the scope will be charged to the * given memcg. * - * NOTE: This function is not nesting safe. + * NOTE: This function can nest. Users must save the return value and + * reset the previous value after their own charging scope is over. */ -static inline void memalloc_use_memcg(struct mem_cgroup *memcg) +static inline struct mem_cgroup * +set_active_memcg(struct mem_cgroup *memcg) { - WARN_ON_ONCE(current->active_memcg); + struct mem_cgroup *old = current->active_memcg; current->active_memcg = memcg; -} - -/** - * memalloc_unuse_memcg - Ends the remote memcg charging scope. - * - * This function marks the end of the remote memcg charging scope started by - * memalloc_use_memcg(). - */ -static inline void memalloc_unuse_memcg(void) -{ - current->active_memcg = NULL; + return old; } #else -static inline void memalloc_use_memcg(struct mem_cgroup *memcg) -{ -} - -static inline void memalloc_unuse_memcg(void) +static inline struct mem_cgroup * +set_active_memcg(struct mem_cgroup *memcg) { + return NULL; } #endif -- cgit v1.2.3 From 37d5985c003daab138a72dd4af9853b396d91c26 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sat, 17 Oct 2020 16:13:50 -0700 Subject: mm: kmem: prepare remote memcg charging infra for interrupt contexts Remote memcg charging API uses current->active_memcg to store the currently active memory cgroup, which overwrites the memory cgroup of the current process. It works well for normal contexts, but doesn't work for interrupt contexts: indeed, if an interrupt occurs during the execution of a section with an active memcg set, all allocations inside the interrupt will be charged to the active memcg set (given that we'll enable accounting for allocations from an interrupt context). But because the interrupt might have no relation to the active memcg set outside, it's obviously wrong from the accounting prospective. To resolve this problem, let's add a global percpu int_active_memcg variable, which will be used to store an active memory cgroup which will be used from interrupt contexts. set_active_memcg() will transparently use current->active_memcg or int_active_memcg depending on the context. To make the read part simple and transparent for the caller, let's introduce two new functions: - struct mem_cgroup *active_memcg(void), - struct mem_cgroup *get_active_memcg(void). They are returning the active memcg if it's set, hiding all implementation details: where to get it depending on the current context. Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Link: http://lkml.kernel.org/r/20200827225843.1270629-4-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 1a80fb128e74..d5ece7a9a403 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -279,6 +279,7 @@ static inline void memalloc_nocma_restore(unsigned int flags) #endif #ifdef CONFIG_MEMCG +DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg); /** * set_active_memcg - Starts the remote memcg charging scope. * @memcg: memcg to charge. @@ -293,8 +294,16 @@ static inline void memalloc_nocma_restore(unsigned int flags) static inline struct mem_cgroup * set_active_memcg(struct mem_cgroup *memcg) { - struct mem_cgroup *old = current->active_memcg; - current->active_memcg = memcg; + struct mem_cgroup *old; + + if (in_interrupt()) { + old = this_cpu_read(int_active_memcg); + this_cpu_write(int_active_memcg, memcg); + } else { + old = current->active_memcg; + current->active_memcg = memcg; + } + return old; } #else -- cgit v1.2.3 From 4127c6504f25c4fcff52dc996efda2ef859dd661 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sat, 17 Oct 2020 16:13:53 -0700 Subject: mm: kmem: enable kernel memcg accounting from interrupt contexts If a memcg to charge can be determined (using remote charging API), there are no reasons to exclude allocations made from an interrupt context from the accounting. Such allocations will pass even if the resulting memcg size will exceed the hard limit, but it will affect the application of the memory pressure and an inability to put the workload under the limit will eventually trigger the OOM. To use active_memcg() helper, memcg_kmem_bypass() is moved back to memcontrol.c. Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Link: http://lkml.kernel.org/r/20200827225843.1270629-5-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6ef4a552e09d..e391e3c56de5 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1531,18 +1531,6 @@ static inline bool memcg_kmem_enabled(void) return static_branch_likely(&memcg_kmem_enabled_key); } -static inline bool memcg_kmem_bypass(void) -{ - if (in_interrupt()) - return true; - - /* Allow remote memcg charging in kthread contexts. */ - if ((!current->mm || (current->flags & PF_KTHREAD)) && - !current->active_memcg) - return true; - return false; -} - static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) { -- cgit v1.2.3 From 0726b01e70455f9900ab524117c7b520d197dc8c Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Sat, 17 Oct 2020 16:14:50 -0700 Subject: mm/madvise: pass mm to do_madvise Patch series "introduce memory hinting API for external process", v9. Now, we have MADV_PAGEOUT and MADV_COLD as madvise hinting API. With that, application could give hints to kernel what memory range are preferred to be reclaimed. However, in some platform(e.g., Android), the information required to make the hinting decision is not known to the app. Instead, it is known to a centralized userspace daemon(e.g., ActivityManagerService), and that daemon must be able to initiate reclaim on its own without any app involvement. To solve the concern, this patch introduces new syscall - process_madvise(2). Bascially, it's same with madvise(2) syscall but it has some differences. 1. It needs pidfd of target process to provide the hint 2. It supports only MADV_{COLD|PAGEOUT|MERGEABLE|UNMEREABLE} at this moment. Other hints in madvise will be opened when there are explicit requests from community to prevent unexpected bugs we couldn't support. 3. Only privileged processes can do something for other process's address space. For more detail of the new API, please see "mm: introduce external memory hinting API" description in this patchset. This patch (of 3): In upcoming patches, do_madvise will be called from external process context so we shouldn't asssume "current" is always hinted process's task_struct. Furthermore, we must not access mm_struct via task->mm, but obtain it via access_mm() once (in the following patch) and only use that pointer [1], so pass it to do_madvise() as well. Note the vma->vm_mm pointers are safe, so we can use them further down the call stack. And let's pass current->mm as arguments of do_madvise so it shouldn't change existing behavior but prepare next patch to make review easy. [vbabka@suse.cz: changelog tweak] [minchan@kernel.org: use current->mm for io_uring] Link: http://lkml.kernel.org/r/20200423145215.72666-1-minchan@kernel.org [akpm@linux-foundation.org: fix it for upstream changes] [akpm@linux-foundation.org: whoops] [rdunlap@infradead.org: add missing includes] Signed-off-by: Minchan Kim Signed-off-by: Andrew Morton Reviewed-by: Suren Baghdasaryan Reviewed-by: Vlastimil Babka Acked-by: David Rientjes Cc: Jens Axboe Cc: Jann Horn Cc: Tim Murray Cc: Daniel Colascione Cc: Sandeep Patil Cc: Sonny Rao Cc: Brian Geffon Cc: Michal Hocko Cc: Johannes Weiner Cc: Shakeel Butt Cc: John Dias Cc: Joel Fernandes Cc: Alexander Duyck Cc: SeongJae Park Cc: Christian Brauner Cc: Kirill Tkhai Cc: Oleksandr Natalenko Cc: SeongJae Park Cc: Christian Brauner Cc: Florian Weimer Cc: Link: https://lkml.kernel.org/r/20200901000633.1920247-1-minchan@kernel.org Link: http://lkml.kernel.org/r/20200622192900.22757-1-minchan@kernel.org Link: http://lkml.kernel.org/r/20200302193630.68771-2-minchan@kernel.org Link: http://lkml.kernel.org/r/20200622192900.22757-2-minchan@kernel.org Link: https://lkml.kernel.org/r/20200901000633.1920247-2-minchan@kernel.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 61a2633fcc7f..ef360fe70aaf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2579,7 +2579,7 @@ extern int __do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf, bool downgrade); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); -extern int do_madvise(unsigned long start, size_t len_in, int behavior); +extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); #ifdef CONFIG_MMU extern int __mm_populate(unsigned long addr, unsigned long len, -- cgit v1.2.3 From 1aa92cd31c1c032ddfed27e79d646bbb429e9b52 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Sat, 17 Oct 2020 16:14:54 -0700 Subject: pid: move pidfd_get_pid() to pid.c process_madvise syscall needs pidfd_get_pid function to translate pidfd to pid so this patch move the function to kernel/pid.c. Suggested-by: Alexander Duyck Signed-off-by: Minchan Kim Signed-off-by: Andrew Morton Reviewed-by: Suren Baghdasaryan Reviewed-by: Alexander Duyck Reviewed-by: Vlastimil Babka Acked-by: Christian Brauner Acked-by: David Rientjes Cc: Jens Axboe Cc: Jann Horn Cc: Brian Geffon Cc: Daniel Colascione Cc: Joel Fernandes Cc: Johannes Weiner Cc: John Dias Cc: Kirill Tkhai Cc: Michal Hocko Cc: Oleksandr Natalenko Cc: Sandeep Patil Cc: SeongJae Park Cc: SeongJae Park Cc: Shakeel Butt Cc: Sonny Rao Cc: Tim Murray Cc: Christian Brauner Cc: Florian Weimer Cc: Link: http://lkml.kernel.org/r/20200302193630.68771-5-minchan@kernel.org Link: http://lkml.kernel.org/r/20200622192900.22757-3-minchan@kernel.org Link: https://lkml.kernel.org/r/20200901000633.1920247-3-minchan@kernel.org Signed-off-by: Linus Torvalds --- include/linux/pid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index 176d6cf80e7c..fa10acb8d6a4 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -77,6 +77,7 @@ extern const struct file_operations pidfd_fops; struct file; extern struct pid *pidfd_pid(const struct file *file); +struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); static inline struct pid *get_pid(struct pid *pid) { -- cgit v1.2.3 From ecb8ac8b1f146915aa6b96449b66dd48984caacc Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Sat, 17 Oct 2020 16:14:59 -0700 Subject: mm/madvise: introduce process_madvise() syscall: an external memory hinting API There is usecase that System Management Software(SMS) want to give a memory hint like MADV_[COLD|PAGEEOUT] to other processes and in the case of Android, it is the ActivityManagerService. The information required to make the reclaim decision is not known to the app. Instead, it is known to the centralized userspace daemon(ActivityManagerService), and that daemon must be able to initiate reclaim on its own without any app involvement. To solve the issue, this patch introduces a new syscall process_madvise(2). It uses pidfd of an external process to give the hint. It also supports vector address range because Android app has thousands of vmas due to zygote so it's totally waste of CPU and power if we should call the syscall one by one for each vma.(With testing 2000-vma syscall vs 1-vector syscall, it showed 15% performance improvement. I think it would be bigger in real practice because the testing ran very cache friendly environment). Another potential use case for the vector range is to amortize the cost ofTLB shootdowns for multiple ranges when using MADV_DONTNEED; this could benefit users like TCP receive zerocopy and malloc implementations. In future, we could find more usecases for other advises so let's make it happens as API since we introduce a new syscall at this moment. With that, existing madvise(2) user could replace it with process_madvise(2) with their own pid if they want to have batch address ranges support feature. ince it could affect other process's address range, only privileged process(PTRACE_MODE_ATTACH_FSCREDS) or something else(e.g., being the same UID) gives it the right to ptrace the process could use it successfully. The flag argument is reserved for future use if we need to extend the API. I think supporting all hints madvise has/will supported/support to process_madvise is rather risky. Because we are not sure all hints make sense from external process and implementation for the hint may rely on the caller being in the current context so it could be error-prone. Thus, I just limited hints as MADV_[COLD|PAGEOUT] in this patch. If someone want to add other hints, we could hear the usecase and review it for each hint. It's safer for maintenance rather than introducing a buggy syscall but hard to fix it later. So finally, the API is as follows, ssize_t process_madvise(int pidfd, const struct iovec *iovec, unsigned long vlen, int advice, unsigned int flags); DESCRIPTION The process_madvise() system call is used to give advice or directions to the kernel about the address ranges from external process as well as local process. It provides the advice to address ranges of process described by iovec and vlen. The goal of such advice is to improve system or application performance. The pidfd selects the process referred to by the PID file descriptor specified in pidfd. (See pidofd_open(2) for further information) The pointer iovec points to an array of iovec structures, defined in as: struct iovec { void *iov_base; /* starting address */ size_t iov_len; /* number of bytes to be advised */ }; The iovec describes address ranges beginning at address(iov_base) and with size length of bytes(iov_len). The vlen represents the number of elements in iovec. The advice is indicated in the advice argument, which is one of the following at this moment if the target process specified by pidfd is external. MADV_COLD MADV_PAGEOUT Permission to provide a hint to external process is governed by a ptrace access mode PTRACE_MODE_ATTACH_FSCREDS check; see ptrace(2). The process_madvise supports every advice madvise(2) has if target process is in same thread group with calling process so user could use process_madvise(2) to extend existing madvise(2) to support vector address ranges. RETURN VALUE On success, process_madvise() returns the number of bytes advised. This return value may be less than the total number of requested bytes, if an error occurred. The caller should check return value to determine whether a partial advice occurred. FAQ: Q.1 - Why does any external entity have better knowledge? Quote from Sandeep "For Android, every application (including the special SystemServer) are forked from Zygote. The reason of course is to share as many libraries and classes between the two as possible to benefit from the preloading during boot. After applications start, (almost) all of the APIs end up calling into this SystemServer process over IPC (binder) and back to the application. In a fully running system, the SystemServer monitors every single process periodically to calculate their PSS / RSS and also decides which process is "important" to the user for interactivity. So, because of how these processes start _and_ the fact that the SystemServer is looping to monitor each process, it does tend to *know* which address range of the application is not used / useful. Besides, we can never rely on applications to clean things up themselves. We've had the "hey app1, the system is low on memory, please trim your memory usage down" notifications for a long time[1]. They rely on applications honoring the broadcasts and very few do. So, if we want to avoid the inevitable killing of the application and restarting it, some way to be able to tell the OS about unimportant memory in these applications will be useful. - ssp Q.2 - How to guarantee the race(i.e., object validation) between when giving a hint from an external process and get the hint from the target process? process_madvise operates on the target process's address space as it exists at the instant that process_madvise is called. If the space target process can run between the time the process_madvise process inspects the target process address space and the time that process_madvise is actually called, process_madvise may operate on memory regions that the calling process does not expect. It's the responsibility of the process calling process_madvise to close this race condition. For example, the calling process can suspend the target process with ptrace, SIGSTOP, or the freezer cgroup so that it doesn't have an opportunity to change its own address space before process_madvise is called. Another option is to operate on memory regions that the caller knows a priori will be unchanged in the target process. Yet another option is to accept the race for certain process_madvise calls after reasoning that mistargeting will do no harm. The suggested API itself does not provide synchronization. It also apply other APIs like move_pages, process_vm_write. The race isn't really a problem though. Why is it so wrong to require that callers do their own synchronization in some manner? Nobody objects to write(2) merely because it's possible for two processes to open the same file and clobber each other's writes --- instead, we tell people to use flock or something. Think about mmap. It never guarantees newly allocated address space is still valid when the user tries to access it because other threads could unmap the memory right before. That's where we need synchronization by using other API or design from userside. It shouldn't be part of API itself. If someone needs more fine-grained synchronization rather than process level, there were two ideas suggested - cookie[2] and anon-fd[3]. Both are applicable via using last reserved argument of the API but I don't think it's necessary right now since we have already ways to prevent the race so don't want to add additional complexity with more fine-grained optimization model. To make the API extend, it reserved an unsigned long as last argument so we could support it in future if someone really needs it. Q.3 - Why doesn't ptrace work? Injecting an madvise in the target process using ptrace would not work for us because such injected madvise would have to be executed by the target process, which means that process would have to be runnable and that creates the risk of the abovementioned race and hinting a wrong VMA. Furthermore, we want to act the hint in caller's context, not the callee's, because the callee is usually limited in cpuset/cgroups or even freezed state so they can't act by themselves quick enough, which causes more thrashing/kill. It doesn't work if the target process are ptraced(e.g., strace, debugger, minidump) because a process can have at most one ptracer. [1] https://developer.android.com/topic/performance/memory" [2] process_getinfo for getting the cookie which is updated whenever vma of process address layout are changed - Daniel Colascione - https://lore.kernel.org/lkml/20190520035254.57579-1-minchan@kernel.org/T/#m7694416fd179b2066a2c62b5b139b14e3894e224 [3] anonymous fd which is used for the object(i.e., address range) validation - Michal Hocko - https://lore.kernel.org/lkml/20200120112722.GY18451@dhcp22.suse.cz/ [minchan@kernel.org: fix process_madvise build break for arm64] Link: http://lkml.kernel.org/r/20200303145756.GA219683@google.com [minchan@kernel.org: fix build error for mips of process_madvise] Link: http://lkml.kernel.org/r/20200508052517.GA197378@google.com [akpm@linux-foundation.org: fix patch ordering issue] [akpm@linux-foundation.org: fix arm64 whoops] [minchan@kernel.org: make process_madvise() vlen arg have type size_t, per Florian] [akpm@linux-foundation.org: fix i386 build] [sfr@canb.auug.org.au: fix syscall numbering] Link: https://lkml.kernel.org/r/20200905142639.49fc3f1a@canb.auug.org.au [sfr@canb.auug.org.au: madvise.c needs compat.h] Link: https://lkml.kernel.org/r/20200908204547.285646b4@canb.auug.org.au [minchan@kernel.org: fix mips build] Link: https://lkml.kernel.org/r/20200909173655.GC2435453@google.com [yuehaibing@huawei.com: remove duplicate header which is included twice] Link: https://lkml.kernel.org/r/20200915121550.30584-1-yuehaibing@huawei.com [minchan@kernel.org: do not use helper functions for process_madvise] Link: https://lkml.kernel.org/r/20200921175539.GB387368@google.com [akpm@linux-foundation.org: pidfd_get_pid() gained an argument] [sfr@canb.auug.org.au: fix up for "iov_iter: transparently handle compat iovecs in import_iovec"] Link: https://lkml.kernel.org/r/20200928212542.468e1fef@canb.auug.org.au Signed-off-by: Minchan Kim Signed-off-by: YueHaibing Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Reviewed-by: Suren Baghdasaryan Reviewed-by: Vlastimil Babka Acked-by: David Rientjes Cc: Alexander Duyck Cc: Brian Geffon Cc: Christian Brauner Cc: Daniel Colascione Cc: Jann Horn Cc: Jens Axboe Cc: Joel Fernandes Cc: Johannes Weiner Cc: John Dias Cc: Kirill Tkhai Cc: Michal Hocko Cc: Oleksandr Natalenko Cc: Sandeep Patil Cc: SeongJae Park Cc: SeongJae Park Cc: Shakeel Butt Cc: Sonny Rao Cc: Tim Murray Cc: Christian Brauner Cc: Florian Weimer Cc: Link: http://lkml.kernel.org/r/20200302193630.68771-3-minchan@kernel.org Link: http://lkml.kernel.org/r/20200508183320.GA125527@google.com Link: http://lkml.kernel.org/r/20200622192900.22757-4-minchan@kernel.org Link: https://lkml.kernel.org/r/20200901000633.1920247-4-minchan@kernel.org Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 06db09875aa4..2eda7678fe1d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -879,6 +879,8 @@ asmlinkage long sys_munlockall(void); asmlinkage long sys_mincore(unsigned long start, size_t len, unsigned char __user * vec); asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); +asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec, + size_t vlen, int behavior, unsigned int flags); asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); -- cgit v1.2.3 From b944afc9d64ddf1b6a152c23ff86bf26e1fd430c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 17 Oct 2020 16:15:06 -0700 Subject: mm: add a VM_MAP_PUT_PAGES flag for vmap Add a flag so that vmap takes ownership of the passed in page array. When vfree is called on such an allocation it will put one reference on each page, and free the page array itself. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Cc: Boris Ostrovsky Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Juergen Gross Cc: Matthew Auld Cc: "Matthew Wilcox (Oracle)" Cc: Minchan Kim Cc: Nitin Gupta Cc: Peter Zijlstra Cc: Rodrigo Vivi Cc: Stefano Stabellini Cc: Tvrtko Ursulin Cc: Uladzislau Rezki (Sony) Link: https://lkml.kernel.org/r/20201002122204.1534411-3-hch@lst.de Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 0221f852a7e1..b899681e3ff9 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -24,6 +24,7 @@ struct notifier_block; /* in notifier.h */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* don't add guard page */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ +#define VM_MAP_PUT_PAGES 0x00000100 /* put pages and free array in vfree */ /* * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. -- cgit v1.2.3 From 3e9a9e256b1e1e6e8f19faf76fa9c37578ae35ee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 17 Oct 2020 16:15:10 -0700 Subject: mm: add a vmap_pfn function Add a proper helper to remap PFNs into kernel virtual space so that drivers don't have to abuse alloc_vm_area and open coded PTE manipulation for it. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Cc: Boris Ostrovsky Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Juergen Gross Cc: Matthew Auld Cc: "Matthew Wilcox (Oracle)" Cc: Minchan Kim Cc: Nitin Gupta Cc: Peter Zijlstra Cc: Rodrigo Vivi Cc: Stefano Stabellini Cc: Tvrtko Ursulin Cc: Uladzislau Rezki (Sony) Link: https://lkml.kernel.org/r/20201002122204.1534411-4-hch@lst.de Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index b899681e3ff9..c77efeac2425 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -122,6 +122,7 @@ extern void vfree_atomic(const void *addr); extern void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot); +void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot); extern void vunmap(const void *addr); extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, -- cgit v1.2.3 From 301fa9f2ddf7fb248c188af292c9cc04f8283dff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 17 Oct 2020 16:15:39 -0700 Subject: mm: remove alloc_vm_area All users are gone now. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Cc: Boris Ostrovsky Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Juergen Gross Cc: Matthew Auld Cc: "Matthew Wilcox (Oracle)" Cc: Minchan Kim Cc: Nitin Gupta Cc: Peter Zijlstra Cc: Rodrigo Vivi Cc: Stefano Stabellini Cc: Tvrtko Ursulin Cc: Uladzislau Rezki (Sony) Link: https://lkml.kernel.org/r/20201002122204.1534411-12-hch@lst.de Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index c77efeac2425..938eaf9517e2 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -169,6 +169,7 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, const void *caller); +void free_vm_area(struct vm_struct *area); extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr); @@ -204,10 +205,6 @@ static inline void set_vm_flush_reset_perms(void *addr) } #endif -/* Allocate/destroy a 'vmalloc' VM area. */ -extern struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes); -extern void free_vm_area(struct vm_struct *area); - /* for /dev/kmem */ extern long vread(char *buf, char *addr, unsigned long count); extern long vwrite(char *buf, char *addr, unsigned long count); -- cgit v1.2.3 From 695cebe58dcf3d9802cdfa9c327b5c7641a5914b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Oct 2020 10:41:07 +0200 Subject: dma-mapping: move more functions to dma-map-ops.h Due to a mismerge a bunch of prototypes that should have moved to dma-map-ops.h are still in dma-mapping.h, fix that up. Signed-off-by: Christoph Hellwig --- include/linux/dma-map-ops.h | 23 +++++++++++++++++++++++ include/linux/dma-mapping.h | 24 ------------------------ 2 files changed, 23 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 8029f7e04145..a5f89fc4d6df 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -203,6 +203,29 @@ static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, } #endif /* CONFIG_DMA_DECLARE_COHERENT */ +int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +struct page *dma_common_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); +void dma_common_free_pages(struct device *dev, size_t size, struct page *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); + +struct page **dma_common_find_pages(void *cpu_addr); +void *dma_common_contiguous_remap(struct page *page, size_t size, pgprot_t prot, + const void *caller); +void *dma_common_pages_remap(struct page **pages, size_t size, pgprot_t prot, + const void *caller); +void dma_common_free_remap(void *cpu_addr, size_t size); + +struct page *dma_alloc_from_pool(struct device *dev, size_t size, + void **cpu_addr, gfp_t flags, + bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t)); +bool dma_free_from_pool(struct device *dev, void *start, size_t size); + #ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H #include #elif defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 3f029afdc9dc..956151052d45 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -389,30 +389,6 @@ static inline void dma_sync_sgtable_for_device(struct device *dev, #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0) #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0) -extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs); -struct page *dma_common_alloc_pages(struct device *dev, size_t size, - dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); -void dma_common_free_pages(struct device *dev, size_t size, struct page *vaddr, - dma_addr_t dma_handle, enum dma_data_direction dir); -struct page **dma_common_find_pages(void *cpu_addr); -void *dma_common_contiguous_remap(struct page *page, size_t size, - pgprot_t prot, const void *caller); - -void *dma_common_pages_remap(struct page **pages, size_t size, - pgprot_t prot, const void *caller); -void dma_common_free_remap(void *cpu_addr, size_t size); - -struct page *dma_alloc_from_pool(struct device *dev, size_t size, - void **cpu_addr, gfp_t flags, - bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t)); -bool dma_free_from_pool(struct device *dev, void *start, size_t size); - -int -dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, - dma_addr_t dma_addr, size_t size, unsigned long attrs); - static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { -- cgit v1.2.3 From cb3a92da231bcf55c243d00fa619ee36281b0001 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Tue, 20 Oct 2020 05:22:56 -0400 Subject: block: remove unused members for io_context After removing blk-sq code, there is no user of nr_batch_requests and last_waited in kernel. Signed-off-by: Yufen Yu Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 1dcd9198beb7..0a9dc40b7be8 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -106,12 +106,6 @@ struct io_context { unsigned short ioprio; - /* - * For request batching - */ - int nr_batch_requests; /* Number of requests left in the batch */ - unsigned long last_waited; /* Time last woken after wait for request */ - struct radix_tree_root icq_tree; struct io_cq __rcu *icq_hint; struct hlist_head icq_list; -- cgit v1.2.3 From aa9c9b3f3f08cb0fda8a8139e6fb302c9a2e21ed Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Tue, 20 Oct 2020 17:00:27 +0200 Subject: PM: runtime: Fix typo in pm_runtime_set_active() helper comment This patch is to fix typo in the comment of helper pm_runtime_set_active(). Signed-off-by: Bean Huo [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 6245caa18034..18b02dcc168e 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -479,7 +479,7 @@ static inline int pm_runtime_set_active(struct device *dev) } /** - * pm_runtime_set_suspended - Set runtime PM status to "active". + * pm_runtime_set_suspended - Set runtime PM status to "suspended". * @dev: Target device. * * Set the runtime PM status of @dev to %RPM_SUSPENDED and ensure that -- cgit v1.2.3 From 0cfcd405e758ba1d277e58436fb32f06888c3e41 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Sun, 18 Oct 2020 23:42:49 -0400 Subject: NFSv4.2: Fix NFS4ERR_STALE error when doing inter server copy NFS_FS=y as dependency of CONFIG_NFSD_V4_2_INTER_SSC still have build errors and some configs with NFSD=m to get NFS4ERR_STALE error when doing inter server copy. Added ops table in nfs_common for knfsd to access NFS client modules. Fixes: 3ac3711adb88 ("NFSD: Fix NFS server build errors") Signed-off-by: Dai Ngo Signed-off-by: J. Bruce Fields --- include/linux/nfs_ssc.h | 67 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 include/linux/nfs_ssc.h (limited to 'include/linux') diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h new file mode 100644 index 000000000000..f5ba0fbff72f --- /dev/null +++ b/include/linux/nfs_ssc.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * include/linux/nfs_ssc.h + * + * Author: Dai Ngo + * + * Copyright (c) 2020, Oracle and/or its affiliates. + */ + +#include + +extern struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl; + +/* + * NFS_V4 + */ +struct nfs4_ssc_client_ops { + struct file *(*sco_open)(struct vfsmount *ss_mnt, + struct nfs_fh *src_fh, nfs4_stateid *stateid); + void (*sco_close)(struct file *filep); +}; + +/* + * NFS_FS + */ +struct nfs_ssc_client_ops { + void (*sco_sb_deactive)(struct super_block *sb); +}; + +struct nfs_ssc_client_ops_tbl { + const struct nfs4_ssc_client_ops *ssc_nfs4_ops; + const struct nfs_ssc_client_ops *ssc_nfs_ops; +}; + +extern void nfs42_ssc_register_ops(void); +extern void nfs42_ssc_unregister_ops(void); + +extern void nfs42_ssc_register(const struct nfs4_ssc_client_ops *ops); +extern void nfs42_ssc_unregister(const struct nfs4_ssc_client_ops *ops); + +#ifdef CONFIG_NFSD_V4_2_INTER_SSC +static inline struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, + struct nfs_fh *src_fh, nfs4_stateid *stateid) +{ + if (nfs_ssc_client_tbl.ssc_nfs4_ops) + return (*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_open)(ss_mnt, src_fh, stateid); + return ERR_PTR(-EIO); +} + +static inline void nfs42_ssc_close(struct file *filep) +{ + if (nfs_ssc_client_tbl.ssc_nfs4_ops) + (*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep); +} +#endif + +/* + * NFS_FS + */ +extern void nfs_ssc_register(const struct nfs_ssc_client_ops *ops); +extern void nfs_ssc_unregister(const struct nfs_ssc_client_ops *ops); + +static inline void nfs_do_sb_deactive(struct super_block *sb) +{ + if (nfs_ssc_client_tbl.ssc_nfs_ops) + (*nfs_ssc_client_tbl.ssc_nfs_ops->sco_sb_deactive)(sb); +} -- cgit v1.2.3 From 0afa15e1a5294754066343cad24af5ec8edae96d Mon Sep 17 00:00:00 2001 From: Pierre Morel Date: Thu, 10 Sep 2020 10:53:49 +0200 Subject: virtio: let arch advertise guest's memory access restrictions An architecture may restrict host access to guest memory, e.g. IBM s390 Secure Execution or AMD SEV. Provide a new Kconfig entry the architecture can select, CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS, when it provides the arch_has_restricted_virtio_memory_access callback to advertise to VIRTIO common code when the architecture restricts memory access from the host. The common code can then fail the probe for any device where VIRTIO_F_ACCESS_PLATFORM is required, but not set. Signed-off-by: Pierre Morel Reviewed-by: Cornelia Huck Reviewed-by: Halil Pasic Link: https://lore.kernel.org/r/1599728030-17085-2-git-send-email-pmorel@linux.ibm.com Signed-off-by: Michael S. Tsirkin Acked-by: Christian Borntraeger --- include/linux/virtio_config.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 8fe857e27ef3..3f697c8c8205 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -540,4 +540,14 @@ static inline void virtio_cwrite64(struct virtio_device *vdev, virtio_cread_le((vdev), structname, member, ptr); \ _r; \ }) + +#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS +int arch_has_restricted_virtio_memory_access(void); +#else +static inline int arch_has_restricted_virtio_memory_access(void) +{ + return 0; +} +#endif /* CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS */ + #endif /* _LINUX_VIRTIO_CONFIG_H */ -- cgit v1.2.3 From 9e9eb226b91225fc199bbafc06f3cd70bfce0100 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 14 Oct 2020 11:26:46 -0700 Subject: KVM: Cache as_id in kvm_memory_slot Cache the address space ID just like the slot ID. It will be used in order to fill in the dirty ring entries. Suggested-by: Paolo Bonzini Suggested-by: Sean Christopherson Reviewed-by: Sean Christopherson Signed-off-by: Peter Xu Message-Id: <20201014182700.2888246-7-bgardon@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 05e3c2fb3ef7..c6f45687ba89 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -346,6 +346,7 @@ struct kvm_memory_slot { unsigned long userspace_addr; u32 flags; short id; + u16 as_id; }; static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) -- cgit v1.2.3 From ba452c9e996d8a4c347b32805f91abb70de5de7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 20 Oct 2020 23:25:56 +0200 Subject: bpf: Fix bpf_redirect_neigh helper api to support supplying nexthop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on the discussion in [0], update the bpf_redirect_neigh() helper to accept an optional parameter specifying the nexthop information. This makes it possible to combine bpf_fib_lookup() and bpf_redirect_neigh() without incurring a duplicate FIB lookup - since the FIB lookup helper will return the nexthop information even if no neighbour is present, this can simply be passed on to bpf_redirect_neigh() if bpf_fib_lookup() returns BPF_FIB_LKUP_RET_NO_NEIGH. Thus fix & extend it before helper API is frozen. [0] https://lore.kernel.org/bpf/393e17fc-d187-3a8d-2f0d-a627c7c63fca@iogearbox.net/ Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Reviewed-by: David Ahern Link: https://lore.kernel.org/bpf/160322915615.32199.1187570224032024535.stgit@toke.dk --- include/linux/filter.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 20fc24c9779a..72d62cbc1578 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -607,12 +607,21 @@ struct bpf_skb_data_end { void *data_end; }; +struct bpf_nh_params { + u32 nh_family; + union { + u32 ipv4_nh; + struct in6_addr ipv6_nh; + }; +}; + struct bpf_redirect_info { u32 flags; u32 tgt_index; void *tgt_value; struct bpf_map *map; u32 kern_flags; + struct bpf_nh_params nh; }; DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); -- cgit v1.2.3 From ebfe3c5183733f784264450a41646a482f964e5e Mon Sep 17 00:00:00 2001 From: Di Zhu Date: Wed, 21 Oct 2020 10:00:53 +0800 Subject: rtnetlink: fix data overflow in rtnl_calcit() "ip addr show" command execute error when we have a physical network card with a large number of VFs The return value of if_nlmsg_size() in rtnl_calcit() will exceed range of u16 data type when any network cards has a larger number of VFs. rtnl_vfinfo_size() will significant increase needed dump size when the value of num_vfs is larger. Eventually we get a wrong value of min_ifinfo_dump_size because of overflow which decides the memory size needed by netlink dump and netlink_dump() will return -EMSGSIZE because of not enough memory was allocated. So fix it by promoting min_dump_alloc data type to u32 to avoid whole netlink message size overflow and it's also align with the data type of struct netlink_callback{}.min_dump_alloc which is assigned by return value of rtnl_calcit() Signed-off-by: Di Zhu Link: https://lore.kernel.org/r/20201021020053.1401-1-zhudi21@huawei.com Signed-off-by: Jakub Kicinski --- include/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 666cd0390699..9f118771e248 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -240,7 +240,7 @@ struct netlink_dump_control { int (*done)(struct netlink_callback *); void *data; struct module *module; - u16 min_dump_alloc; + u32 min_dump_alloc; }; int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, -- cgit v1.2.3 From 995a3ed67fc8c0e3301a770016fb66f1bbf15ec8 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 15 Oct 2020 13:37:54 -0700 Subject: ext4: add fast_commit feature and handling for extended mount options We are running out of mount option bits. Add handling for using s_mount_opt2. Add ext4 and jbd2 fast commit feature flag and also add ability to turn off the fast commit feature in Ext4. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201015203802.3597742-3-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 04afa6dcd60d..0685cc95e501 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -289,6 +289,7 @@ typedef struct journal_superblock_s #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 #define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 #define JBD2_FEATURE_INCOMPAT_CSUM_V3 0x00000010 +#define JBD2_FEATURE_INCOMPAT_FAST_COMMIT 0x00000020 /* See "journal feature predicate functions" below */ @@ -299,7 +300,8 @@ typedef struct journal_superblock_s JBD2_FEATURE_INCOMPAT_64BIT | \ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \ JBD2_FEATURE_INCOMPAT_CSUM_V2 | \ - JBD2_FEATURE_INCOMPAT_CSUM_V3) + JBD2_FEATURE_INCOMPAT_CSUM_V3 | \ + JBD2_FEATURE_INCOMPAT_FAST_COMMIT) #ifdef __KERNEL__ @@ -1263,6 +1265,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(64bit, 64BIT) JBD2_FEATURE_INCOMPAT_FUNCS(async_commit, ASYNC_COMMIT) JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2) JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) +JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) /* * Journal flag definitions -- cgit v1.2.3 From 6866d7b3f2bb4f011041ba54c98b1584497fe2fd Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 15 Oct 2020 13:37:55 -0700 Subject: ext4 / jbd2: add fast commit initialization This patch adds fast commit area trackers in the journal_t structure. These are initialized via the jbd2_fc_init() routine that this patch adds. This patch also adds ext4/fast_commit.c and ext4/fast_commit.h files for fast commit code that will be added in subsequent patches in this series. Reported-by: kernel test robot Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201015203802.3597742-4-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0685cc95e501..008629b4d615 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -918,6 +918,30 @@ struct journal_s */ unsigned long j_last; + /** + * @j_fc_first: + * + * The block number of the first fast commit block in the journal + * [j_state_lock]. + */ + unsigned long j_fc_first; + + /** + * @j_fc_off: + * + * Number of fast commit blocks currently allocated. + * [j_state_lock]. + */ + unsigned long j_fc_off; + + /** + * @j_fc_last: + * + * The block number one beyond the last fast commit block in the journal + * [j_state_lock]. + */ + unsigned long j_fc_last; + /** * @j_dev: Device where we store the journal. */ @@ -1068,6 +1092,12 @@ struct journal_s */ struct buffer_head **j_wbuf; + /** + * @j_fc_wbuf: Array of fast commit bhs for + * jbd2_journal_commit_transaction. + */ + struct buffer_head **j_fc_wbuf; + /** * @j_wbufsize: * @@ -1075,6 +1105,13 @@ struct journal_s */ int j_wbufsize; + /** + * @j_fc_wbufsize: + * + * Size of @j_fc_wbuf array. + */ + int j_fc_wbufsize; + /** * @j_last_sync_writer: * @@ -1535,6 +1572,8 @@ void __jbd2_log_wait_for_space(journal_t *journal); extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); extern int jbd2_cleanup_journal_tail(journal_t *); +/* Fast commit related APIs */ +int jbd2_fc_init(journal_t *journal, int num_fc_blks); /* * is_journal_abort * -- cgit v1.2.3 From ff780b91efe901b8eecd8114785abae5341820ad Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 15 Oct 2020 13:37:56 -0700 Subject: jbd2: add fast commit machinery This functions adds necessary APIs needed in JBD2 layer for fast commits. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201015203802.3597742-5-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 008629b4d615..a009d9b9c620 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -861,6 +861,13 @@ struct journal_s */ wait_queue_head_t j_wait_reserved; + /** + * @j_fc_wait: + * + * Wait queue to wait for completion of async fast commits. + */ + wait_queue_head_t j_fc_wait; + /** * @j_checkpoint_mutex: * @@ -1232,6 +1239,15 @@ struct journal_s */ struct lockdep_map j_trans_commit_map; #endif + + /** + * @j_fc_cleanup_callback: + * + * Clean-up after fast commit or full commit. JBD2 calls this function + * after every commit operation. + */ + void (*j_fc_cleanup_callback)(struct journal_s *journal, int); + }; #define jbd2_might_wait_for_commit(j) \ @@ -1316,6 +1332,8 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file * data write error in ordered * mode */ +#define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */ +#define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */ /* * Function declarations for the journaling transaction and buffer @@ -1574,6 +1592,15 @@ extern int jbd2_cleanup_journal_tail(journal_t *); /* Fast commit related APIs */ int jbd2_fc_init(journal_t *journal, int num_fc_blks); +int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); +int jbd2_fc_end_commit(journal_t *journal); +int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid); +int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); +int jbd2_submit_inode_data(struct jbd2_inode *jinode); +int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); +int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); +int jbd2_fc_release_bufs(journal_t *journal); + /* * is_journal_abort * -- cgit v1.2.3 From 5b849b5f96b47d82b5a432d8b91a8ad260e1de46 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 15 Oct 2020 13:37:58 -0700 Subject: jbd2: fast commit recovery path This patch adds fast commit recovery support in JBD2. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201015203802.3597742-7-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a009d9b9c620..fb3d71ad6eea 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -751,6 +751,11 @@ jbd2_time_diff(unsigned long start, unsigned long end) #define JBD2_NR_BATCH 64 +enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; + +#define JBD2_FC_REPLAY_STOP 0 +#define JBD2_FC_REPLAY_CONTINUE 1 + /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. @@ -1248,6 +1253,21 @@ struct journal_s */ void (*j_fc_cleanup_callback)(struct journal_s *journal, int); + /* + * @j_fc_replay_callback: + * + * File-system specific function that performs replay of a fast + * commit. JBD2 calls this function for each fast commit block found in + * the journal. This function should return JBD2_FC_REPLAY_CONTINUE + * to indicate that the block was processed correctly and more fast + * commit replay should continue. Return value of JBD2_FC_REPLAY_STOP + * indicates the end of replay (no more blocks remaining). A negative + * return value indicates error. + */ + int (*j_fc_replay_callback)(struct journal_s *journal, + struct buffer_head *bh, + enum passtype pass, int off, + tid_t expected_commit_id); }; #define jbd2_might_wait_for_commit(j) \ -- cgit v1.2.3 From ee6e00c868221f5f7d0b6eb4e8379a148e26bc20 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 22 Oct 2020 14:15:51 -0600 Subject: splice: change exported internal do_splice() helper to take kernel offset With the set_fs change, we can no longer rely on copy_{to,from}_user() accepting a kernel pointer, and it was bad form to do so anyway. Clean this up and change the internal helper that io_uring uses to deal with kernel pointers instead. This puts the offset copy in/out in __do_splice() instead, which just calls the same helper. Signed-off-by: Jens Axboe --- include/linux/splice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/splice.h b/include/linux/splice.h index 5c47013f708e..a55179fd60fc 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -78,8 +78,8 @@ extern ssize_t add_to_pipe(struct pipe_inode_info *, struct pipe_buffer *); extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, splice_direct_actor *); -extern long do_splice(struct file *in, loff_t __user *off_in, - struct file *out, loff_t __user *off_out, +extern long do_splice(struct file *in, loff_t *off_in, + struct file *out, loff_t *off_out, size_t len, unsigned int flags); extern long do_tee(struct file *in, struct file *out, size_t len, -- cgit v1.2.3 From 879bc2d27904354b98ca295b6168718e045c4aa2 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 19 Oct 2020 16:57:50 +0200 Subject: hil/parisc: Disable HIL driver when it gets stuck When starting a HP machine with HIL driver but without an HIL keyboard or HIL mouse attached, it may happen that data written to the HIL loop gets stuck (e.g. because the transaction queue is full). Usually one will then have to reboot the machine because all you see is and endless output of: Transaction add failed: transaction already queued? In the higher layers hp_sdc_enqueue_transaction() is called to queued up a HIL packet. This function returns an error code, and this patch adds the necessary checks for this return code and disables the HIL driver if further packets can't be sent. Tested on a HP 730 and a HP 715/64 machine. Signed-off-by: Helge Deller Cc: --- include/linux/hil_mlc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hil_mlc.h b/include/linux/hil_mlc.h index 774f7d3b8f6a..369221fd5518 100644 --- a/include/linux/hil_mlc.h +++ b/include/linux/hil_mlc.h @@ -103,7 +103,7 @@ struct hilse_node { /* Methods for back-end drivers, e.g. hp_sdc_mlc */ typedef int (hil_mlc_cts) (hil_mlc *mlc); -typedef void (hil_mlc_out) (hil_mlc *mlc); +typedef int (hil_mlc_out) (hil_mlc *mlc); typedef int (hil_mlc_in) (hil_mlc *mlc, suseconds_t timeout); struct hil_mlc_devinfo { -- cgit v1.2.3 From a6a0b05da9f37ff56faa6b8351ed6e0b55032460 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Wed, 14 Oct 2020 11:26:55 -0700 Subject: kvm: x86/mmu: Support dirty logging for the TDP MMU Dirty logging is a key feature of the KVM MMU and must be supported by the TDP MMU. Add support for both the write protection and PML dirty logging modes. Tested by running kvm-unit-tests and KVM selftests on an Intel Haswell machine. This series introduced no new failures. This series can be viewed in Gerrit at: https://linux-review.googlesource.com/c/virt/kvm/kvm/+/2538 Signed-off-by: Ben Gardon Message-Id: <20201014182700.2888246-16-bgardon@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c6f45687ba89..7f2e2a09ebbd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -798,6 +798,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); +void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 3f1b623a1be92103386bcab818e25885d6be9419 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 23 Oct 2020 17:00:41 +0800 Subject: vdpa: introduce config op to get valid iova range This patch introduce a config op to get valid iova range from the vDPA device. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20201023090043.14430-2-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index eae0bfd87d91..30bc7a7223bb 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -52,6 +52,16 @@ struct vdpa_device { int nvqs; }; +/** + * vDPA IOVA range - the IOVA range support by the device + * @first: start of the IOVA range + * @last: end of the IOVA range + */ +struct vdpa_iova_range { + u64 first; + u64 last; +}; + /** * vDPA_config_ops - operations for configuring a vDPA device. * Note: vDPA device drivers are required to implement all of the @@ -151,6 +161,10 @@ struct vdpa_device { * @get_generation: Get device config generation (optional) * @vdev: vdpa device * Returns u32: device generation + * @get_iova_range: Get supported iova range (optional) + * @vdev: vdpa device + * Returns the iova range supported by + * the device. * @set_map: Set device memory mapping (optional) * Needed for device that using device * specific DMA translation (on-chip IOMMU) @@ -216,6 +230,7 @@ struct vdpa_config_ops { void (*set_config)(struct vdpa_device *vdev, unsigned int offset, const void *buf, unsigned int len); u32 (*get_generation)(struct vdpa_device *vdev); + struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev); /* DMA ops */ int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb); -- cgit v1.2.3 From c51f8f88d705e06bd696d7510aff22b33eb8e638 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Sun, 9 Aug 2020 06:57:44 +0000 Subject: random32: make prandom_u32() output unpredictable Non-cryptographic PRNGs may have great statistical properties, but are usually trivially predictable to someone who knows the algorithm, given a small sample of their output. An LFSR like prandom_u32() is particularly simple, even if the sample is widely scattered bits. It turns out the network stack uses prandom_u32() for some things like random port numbers which it would prefer are *not* trivially predictable. Predictability led to a practical DNS spoofing attack. Oops. This patch replaces the LFSR with a homebrew cryptographic PRNG based on the SipHash round function, which is in turn seeded with 128 bits of strong random key. (The authors of SipHash have *not* been consulted about this abuse of their algorithm.) Speed is prioritized over security; attacks are rare, while performance is always wanted. Replacing all callers of prandom_u32() is the quick fix. Whether to reinstate a weaker PRNG for uses which can tolerate it is an open question. Commit f227e3ec3b5c ("random32: update the net random state on interrupt and activity") was an earlier attempt at a solution. This patch replaces it. Reported-by: Amit Klein Cc: Willy Tarreau Cc: Eric Dumazet Cc: "Jason A. Donenfeld" Cc: Andy Lutomirski Cc: Kees Cook Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Linus Torvalds Cc: tytso@mit.edu Cc: Florian Westphal Cc: Marc Plumb Fixes: f227e3ec3b5c ("random32: update the net random state on interrupt and activity") Signed-off-by: George Spelvin Link: https://lore.kernel.org/netdev/20200808152628.GA27941@SDF.ORG/ [ willy: partial reversal of f227e3ec3b5c; moved SIPROUND definitions to prandom.h for later use; merged George's prandom_seed() proposal; inlined siprand_u32(); replaced the net_rand_state[] array with 4 members to fix a build issue; cosmetic cleanups to make checkpatch happy; fixed RANDOM32_SELFTEST build ] Signed-off-by: Willy Tarreau --- include/linux/prandom.h | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/prandom.h b/include/linux/prandom.h index aa16e6468f91..cc1e71334e53 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -16,12 +16,44 @@ void prandom_bytes(void *buf, size_t nbytes); void prandom_seed(u32 seed); void prandom_reseed_late(void); +#if BITS_PER_LONG == 64 +/* + * The core SipHash round function. Each line can be executed in + * parallel given enough CPU resources. + */ +#define PRND_SIPROUND(v0, v1, v2, v3) ( \ + v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \ + v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \ + v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \ + v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \ +) + +#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261) +#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573) + +#elif BITS_PER_LONG == 32 +/* + * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash. + * This is weaker, but 32-bit machines are not used for high-traffic + * applications, so there is less output for an attacker to analyze. + */ +#define PRND_SIPROUND(v0, v1, v2, v3) ( \ + v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \ + v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \ + v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \ + v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \ +) +#define PRND_K0 0x6c796765 +#define PRND_K1 0x74656462 + +#else +#error Unsupported BITS_PER_LONG +#endif + struct rnd_state { __u32 s1, s2, s3, s4; }; -DECLARE_PER_CPU(struct rnd_state, net_rand_state); - u32 prandom_u32_state(struct rnd_state *state); void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes); void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); -- cgit v1.2.3 From 3744741adab6d9195551ce30e65e726c7a408421 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 10 Aug 2020 10:27:42 +0200 Subject: random32: add noise from network and scheduling activity With the removal of the interrupt perturbations in previous random32 change (random32: make prandom_u32() output unpredictable), the PRNG has become 100% deterministic again. While SipHash is expected to be way more robust against brute force than the previous Tausworthe LFSR, there's still the risk that whoever has even one temporary access to the PRNG's internal state is able to predict all subsequent draws till the next reseed (roughly every minute). This may happen through a side channel attack or any data leak. This patch restores the spirit of commit f227e3ec3b5c ("random32: update the net random state on interrupt and activity") in that it will perturb the internal PRNG's statee using externally collected noise, except that it will not pick that noise from the random pool's bits nor upon interrupt, but will rather combine a few elements along the Tx path that are collectively hard to predict, such as dev, skb and txq pointers, packet length and jiffies values. These ones are combined using a single round of SipHash into a single long variable that is mixed with the net_rand_state upon each invocation. The operation was inlined because it produces very small and efficient code, typically 3 xor, 2 add and 2 rol. The performance was measured to be the same (even very slightly better) than before the switch to SipHash; on a 6-core 12-thread Core i7-8700k equipped with a 40G NIC (i40e), the connection rate dropped from 556k/s to 555k/s while the SYN cookie rate grew from 5.38 Mpps to 5.45 Mpps. Link: https://lore.kernel.org/netdev/20200808152628.GA27941@SDF.ORG/ Cc: George Spelvin Cc: Amit Klein Cc: Eric Dumazet Cc: "Jason A. Donenfeld" Cc: Andy Lutomirski Cc: Kees Cook Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Linus Torvalds Cc: tytso@mit.edu Cc: Florian Westphal Cc: Marc Plumb Tested-by: Sedat Dilek Signed-off-by: Willy Tarreau --- include/linux/prandom.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/prandom.h b/include/linux/prandom.h index cc1e71334e53..bbf4b4ad61df 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -16,6 +16,12 @@ void prandom_bytes(void *buf, size_t nbytes); void prandom_seed(u32 seed); void prandom_reseed_late(void); +DECLARE_PER_CPU(unsigned long, net_rand_noise); + +#define PRANDOM_ADD_NOISE(a, b, c, d) \ + prandom_u32_add_noise((unsigned long)(a), (unsigned long)(b), \ + (unsigned long)(c), (unsigned long)(d)) + #if BITS_PER_LONG == 64 /* * The core SipHash round function. Each line can be executed in @@ -50,6 +56,18 @@ void prandom_reseed_late(void); #error Unsupported BITS_PER_LONG #endif +static inline void prandom_u32_add_noise(unsigned long a, unsigned long b, + unsigned long c, unsigned long d) +{ + /* + * This is not used cryptographically; it's just + * a convenient 4-word hash function. (3 xor, 2 add, 2 rol) + */ + a ^= raw_cpu_read(net_rand_noise); + PRND_SIPROUND(a, b, c, d); + raw_cpu_write(net_rand_noise, d); +} + struct rnd_state { __u32 s1, s2, s3, s4; }; @@ -99,6 +117,7 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed) state->s2 = __seed(i, 8U); state->s3 = __seed(i, 16U); state->s4 = __seed(i, 128U); + PRANDOM_ADD_NOISE(state, i, 0, 0); } /* Pseudo random number generator from numerical recipes. */ -- cgit v1.2.3 From 23224e45004ed84c8466fd1e8e5860f541187029 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 23 Oct 2020 16:27:16 -0700 Subject: mm: remove kzfree() compatibility definition Commit 453431a54934 ("mm, treewide: rename kzfree() to kfree_sensitive()") renamed kzfree() to kfree_sensitive(), but it left a compatibility definition of kzfree() to avoid being too disruptive. Since then a few more instances of kzfree() have slipped in. Just get rid of them and remove the compatibility definition once and for all. Signed-off-by: Eric Biggers Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 9e155cc83b8a..dd6897f62010 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -187,8 +187,6 @@ void kfree_sensitive(const void *); size_t __ksize(const void *); size_t ksize(const void *); -#define kzfree(x) kfree_sensitive(x) /* For backward compatibility */ - #ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR void __check_heap_object(const void *ptr, unsigned long n, struct page *page, bool to_user); -- cgit v1.2.3 From 33def8498fdde180023444b08e12b72a9efed41d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 21 Oct 2020 19:36:07 -0700 Subject: treewide: Convert macro and uses of __section(foo) to __section("foo") Use a more generic form for __section that requires quotes to avoid complications with clang and gcc differences. Remove the quote operator # from compiler_attributes.h __section macro. Convert all unquoted __section(foo) uses to quoted __section("foo"). Also convert __attribute__((section("foo"))) uses to __section("foo") even if the __attribute__ has multiple list entry forms. Conversion done using the script at: https://lore.kernel.org/lkml/75393e5ddc272dc7403de74d645e6c6e0f4e70eb.camel@perches.com/2-convert_section.pl Signed-off-by: Joe Perches Reviewed-by: Nick Desaulniers Reviewed-by: Miguel Ojeda Signed-off-by: Linus Torvalds --- include/linux/acpi.h | 4 ++-- include/linux/cache.h | 2 +- include/linux/compiler.h | 8 ++++---- include/linux/compiler_attributes.h | 2 +- include/linux/cpu.h | 2 +- include/linux/dynamic_debug.h | 2 +- include/linux/export.h | 2 +- include/linux/firmware.h | 2 +- include/linux/init.h | 34 +++++++++++++++++----------------- include/linux/init_task.h | 4 ++-- include/linux/interrupt.h | 4 ++-- include/linux/kernel.h | 6 +++--- include/linux/linkage.h | 4 ++-- include/linux/lsm_hooks.h | 4 ++-- include/linux/module.h | 2 +- include/linux/moduleparam.h | 4 ++-- include/linux/mtd/xip.h | 2 +- include/linux/objtool.h | 2 +- include/linux/of.h | 2 +- include/linux/percpu-defs.h | 2 +- include/linux/printk.h | 4 ++-- include/linux/rcupdate.h | 2 +- include/linux/sched/debug.h | 2 +- include/linux/serial_core.h | 2 +- include/linux/spinlock.h | 2 +- include/linux/syscalls.h | 6 +++--- include/linux/trace_events.h | 2 +- include/linux/tracepoint.h | 8 ++++---- 28 files changed, 61 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 143c6ffce2db..39263c6b52e1 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1153,7 +1153,7 @@ struct acpi_probe_entry { #define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, \ valid, data, fn) \ static const struct acpi_probe_entry __acpi_probe_##name \ - __used __section(__##table##_acpi_probe_table) = { \ + __used __section("__" #table "_acpi_probe_table") = { \ .id = table_id, \ .type = subtable, \ .subtable_valid = valid, \ @@ -1164,7 +1164,7 @@ struct acpi_probe_entry { #define ACPI_DECLARE_SUBTABLE_PROBE_ENTRY(table, name, table_id, \ subtable, valid, data, fn) \ static const struct acpi_probe_entry __acpi_probe_##name \ - __used __section(__##table##_acpi_probe_table) = { \ + __used __section("__" #table "_acpi_probe_table") = { \ .id = table_id, \ .type = subtable, \ .subtable_valid = valid, \ diff --git a/include/linux/cache.h b/include/linux/cache.h index 1aa8009f6d06..d742c57eaee5 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -34,7 +34,7 @@ * but may get written to during init, so can't live in .rodata (via "const"). */ #ifndef __ro_after_init -#define __ro_after_init __attribute__((__section__(".data..ro_after_init"))) +#define __ro_after_init __section(".data..ro_after_init") #endif #ifndef ____cacheline_aligned diff --git a/include/linux/compiler.h b/include/linux/compiler.h index ac45f6d40d39..e512f5505dad 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -24,7 +24,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, long ______r; \ static struct ftrace_likely_data \ __aligned(4) \ - __section(_ftrace_annotated_branch) \ + __section("_ftrace_annotated_branch") \ ______f = { \ .data.func = __func__, \ .data.file = __FILE__, \ @@ -60,7 +60,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define __trace_if_value(cond) ({ \ static struct ftrace_branch_data \ __aligned(4) \ - __section(_ftrace_branch) \ + __section("_ftrace_branch") \ __if_trace = { \ .func = __func__, \ .file = __FILE__, \ @@ -118,7 +118,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, ".popsection\n\t" /* Annotate a C jump table to allow objtool to follow the code flow */ -#define __annotate_jump_table __section(.rodata..c_jump_table) +#define __annotate_jump_table __section(".rodata..c_jump_table") #else #define annotate_reachable() @@ -206,7 +206,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, * visible to the compiler. */ #define __ADDRESSABLE(sym) \ - static void * __section(.discard.addressable) __used \ + static void * __section(".discard.addressable") __used \ __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym; /** diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index ea7b756b1c8f..b2a3f4f641a7 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -254,7 +254,7 @@ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-section-variable-attribute * clang: https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate */ -#define __section(S) __attribute__((__section__(#S))) +#define __section(section) __attribute__((__section__(section))) /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-unused-function-attribute diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 8aa84c052fdf..d6428aaf67e7 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -173,7 +173,7 @@ void cpu_startup_entry(enum cpuhp_state state); void cpu_idle_poll_ctrl(bool enable); /* Attach to any functions which should be considered cpuidle. */ -#define __cpuidle __attribute__((__section__(".cpuidle.text"))) +#define __cpuidle __section(".cpuidle.text") bool cpu_in_idle(unsigned long pc); diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 8aa0c7c2608c..a57ee75342cf 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -84,7 +84,7 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \ static struct _ddebug __aligned(8) \ - __section(__dyndbg) name = { \ + __section("__dyndbg") name = { \ .modname = KBUILD_MODNAME, \ .function = __func__, \ .filename = __FILE__, \ diff --git a/include/linux/export.h b/include/linux/export.h index 8933ff6ad23a..fceb5e855717 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -130,7 +130,7 @@ struct kernel_symbol { * discarded in the final link stage. */ #define __ksym_marker(sym) \ - static int __ksym_marker_##sym[0] __section(.discard.ksym) __used + static int __ksym_marker_##sym[0] __section(".discard.ksym") __used #define __EXPORT_SYMBOL(sym, sec, ns) \ __ksym_marker(sym); \ diff --git a/include/linux/firmware.h b/include/linux/firmware.h index c15acadc6cf4..84e346ae766e 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -36,7 +36,7 @@ struct builtin_fw { #define DECLARE_BUILTIN_FIRMWARE_SIZE(name, blob, size) \ static const struct builtin_fw __fw_concat(__builtin_fw,__COUNTER__) \ - __used __section(.builtin_fw) = { name, blob, size } + __used __section(".builtin_fw") = { name, blob, size } #if defined(CONFIG_FW_LOADER) || (defined(CONFIG_FW_LOADER_MODULE) && defined(MODULE)) int request_firmware(const struct firmware **fw, const char *name, diff --git a/include/linux/init.h b/include/linux/init.h index 212fc9e2f691..7b53cb3092ee 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -47,11 +47,11 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold __latent_entropy __noinitretpoline -#define __initdata __section(.init.data) -#define __initconst __section(.init.rodata) -#define __exitdata __section(.exit.data) -#define __exit_call __used __section(.exitcall.exit) +#define __init __section(".init.text") __cold __latent_entropy __noinitretpoline +#define __initdata __section(".init.data") +#define __initconst __section(".init.rodata") +#define __exitdata __section(".exit.data") +#define __exit_call __used __section(".exitcall.exit") /* * modpost check for section mismatches during the kernel build. @@ -70,9 +70,9 @@ * * The markers follow same syntax rules as __init / __initdata. */ -#define __ref __section(.ref.text) noinline -#define __refdata __section(.ref.data) -#define __refconst __section(.ref.rodata) +#define __ref __section(".ref.text") noinline +#define __refdata __section(".ref.data") +#define __refconst __section(".ref.rodata") #ifdef MODULE #define __exitused @@ -80,16 +80,16 @@ #define __exitused __used #endif -#define __exit __section(.exit.text) __exitused __cold notrace +#define __exit __section(".exit.text") __exitused __cold notrace /* Used for MEMORY_HOTPLUG */ -#define __meminit __section(.meminit.text) __cold notrace \ +#define __meminit __section(".meminit.text") __cold notrace \ __latent_entropy -#define __meminitdata __section(.meminit.data) -#define __meminitconst __section(.meminit.rodata) -#define __memexit __section(.memexit.text) __exitused __cold notrace -#define __memexitdata __section(.memexit.data) -#define __memexitconst __section(.memexit.rodata) +#define __meminitdata __section(".meminit.data") +#define __meminitconst __section(".meminit.rodata") +#define __memexit __section(".memexit.text") __exitused __cold notrace +#define __memexitdata __section(".memexit.data") +#define __memexitconst __section(".memexit.rodata") /* For assembly routines */ #define __HEAD .section ".head.text","ax" @@ -254,7 +254,7 @@ struct obs_kernel_param { static const char __setup_str_##unique_id[] __initconst \ __aligned(1) = str; \ static struct obs_kernel_param __setup_##unique_id \ - __used __section(.init.setup) \ + __used __section(".init.setup") \ __attribute__((aligned((sizeof(long))))) \ = { __setup_str_##unique_id, fn, early } @@ -298,7 +298,7 @@ void __init parse_early_options(char *cmdline); #endif /* Data marked not to be saved by software suspend */ -#define __nosavedata __section(.data..nosave) +#define __nosavedata __section(".data..nosave") #ifdef MODULE #define __exit_p(x) x diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2c620d7ac432..b2412b4d4c20 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -40,12 +40,12 @@ extern struct cred init_cred; /* Attach to the init_task data structure for proper alignment */ #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK -#define __init_task_data __attribute__((__section__(".data..init_task"))) +#define __init_task_data __section(".data..init_task") #else #define __init_task_data /**/ #endif /* Attach to the thread_info data structure for proper alignment */ -#define __init_thread_info __attribute__((__section__(".data..init_thread_info"))) +#define __init_thread_info __section(".data..init_thread_info") #endif diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f9aee3538461..ee8299eb1f52 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -792,9 +792,9 @@ extern int arch_early_irq_init(void); * We want to know which function is an entrypoint of a hardirq or a softirq. */ #ifndef __irq_entry -# define __irq_entry __attribute__((__section__(".irqentry.text"))) +# define __irq_entry __section(".irqentry.text") #endif -#define __softirq_entry __attribute__((__section__(".softirqentry.text"))) +#define __softirq_entry __section(".softirqentry.text") #endif diff --git a/include/linux/kernel.h b/include/linux/kernel.h index c629215fdad9..2f05e9128201 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -729,7 +729,7 @@ do { \ #define do_trace_printk(fmt, args...) \ do { \ static const char *trace_printk_fmt __used \ - __attribute__((section("__trace_printk_fmt"))) = \ + __section("__trace_printk_fmt") = \ __builtin_constant_p(fmt) ? fmt : NULL; \ \ __trace_printk_check_format(fmt, ##args); \ @@ -773,7 +773,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...); #define trace_puts(str) ({ \ static const char *trace_printk_fmt __used \ - __attribute__((section("__trace_printk_fmt"))) = \ + __section("__trace_printk_fmt") = \ __builtin_constant_p(str) ? str : NULL; \ \ if (__builtin_constant_p(str)) \ @@ -795,7 +795,7 @@ extern void trace_dump_stack(int skip); do { \ if (__builtin_constant_p(fmt)) { \ static const char *trace_printk_fmt __used \ - __attribute__((section("__trace_printk_fmt"))) = \ + __section("__trace_printk_fmt") = \ __builtin_constant_p(fmt) ? fmt : NULL; \ \ __ftrace_vbprintk(_THIS_IP_, trace_printk_fmt, vargs); \ diff --git a/include/linux/linkage.h b/include/linux/linkage.h index d796ec20d114..5bcfbd972e97 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -36,8 +36,8 @@ __stringify(name)) #endif -#define __page_aligned_data __section(.data..page_aligned) __aligned(PAGE_SIZE) -#define __page_aligned_bss __section(.bss..page_aligned) __aligned(PAGE_SIZE) +#define __page_aligned_data __section(".data..page_aligned") __aligned(PAGE_SIZE) +#define __page_aligned_bss __section(".bss..page_aligned") __aligned(PAGE_SIZE) /* * For assembly routines. diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 8814e3d5952d..c503f7ab8afb 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1611,12 +1611,12 @@ extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[]; #define DEFINE_LSM(lsm) \ static struct lsm_info __lsm_##lsm \ - __used __section(.lsm_info.init) \ + __used __section(".lsm_info.init") \ __aligned(sizeof(unsigned long)) #define DEFINE_EARLY_LSM(lsm) \ static struct lsm_info __early_lsm_##lsm \ - __used __section(.early_lsm_info.init) \ + __used __section(".early_lsm_info.init") \ __aligned(sizeof(unsigned long)) #ifdef CONFIG_SECURITY_SELINUX_DISABLE diff --git a/include/linux/module.h b/include/linux/module.h index a29187f7c360..7ccdf87f376f 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -278,7 +278,7 @@ extern typeof(name) __mod_##type##__##name##_device_table \ .version = _version, \ }; \ static const struct module_version_attribute \ - __used __attribute__ ((__section__ ("__modver"))) \ + __used __section("__modver") \ * __moduleparam_const __modver_attr = &___modver_attr #endif diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 47879fc7f75e..6388eb9734a5 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -22,7 +22,7 @@ #define __MODULE_INFO(tag, name, info) \ static const char __UNIQUE_ID(name)[] \ - __used __attribute__((section(".modinfo"), unused, aligned(1))) \ + __used __section(".modinfo") __attribute__((unused, aligned(1))) \ = __MODULE_INFO_PREFIX __stringify(tag) "=" info #define __MODULE_PARM_TYPE(name, _type) \ @@ -289,7 +289,7 @@ struct kparam_array static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used \ - __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ + __section("__param") __attribute__ ((unused, aligned(sizeof(void *)))) \ = { __param_str_##name, THIS_MODULE, ops, \ VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } diff --git a/include/linux/mtd/xip.h b/include/linux/mtd/xip.h index a4e352b1dfe6..3cac9360588f 100644 --- a/include/linux/mtd/xip.h +++ b/include/linux/mtd/xip.h @@ -28,7 +28,7 @@ * those functions so they get relocated to ram. */ #ifdef CONFIG_XIP_KERNEL -#define __xipram noinline __attribute__ ((__section__ (".xiptext"))) +#define __xipram noinline __section(".xiptext") #endif /* diff --git a/include/linux/objtool.h b/include/linux/objtool.h index ab82c793c897..577f51436cf9 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -60,7 +60,7 @@ struct unwind_hint { * For more information, see tools/objtool/Documentation/stack-validation.txt. */ #define STACK_FRAME_NON_STANDARD(func) \ - static void __used __section(.discard.func_stack_frame_non_standard) \ + static void __used __section(".discard.func_stack_frame_non_standard") \ *__func_stack_frame_non_standard_##func = func #else /* __ASSEMBLY__ */ diff --git a/include/linux/of.h b/include/linux/of.h index 481ec0467285..5d51891cbf1a 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1299,7 +1299,7 @@ static inline int of_get_available_child_count(const struct device_node *np) #if defined(CONFIG_OF) && !defined(MODULE) #define _OF_DECLARE(table, name, compat, fn, fn_type) \ static const struct of_device_id __of_table_##name \ - __used __section(__##table##_of_table) \ + __used __section("__" #table "_of_table") \ = { .compatible = compat, \ .data = (fn == (fn_type)NULL) ? fn : fn } #else diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 176bfbd52d97..dff7040f629a 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -51,7 +51,7 @@ PER_CPU_ATTRIBUTES #define __PCPU_DUMMY_ATTRS \ - __attribute__((section(".discard"), unused)) + __section(".discard") __attribute__((unused)) /* * s390 and alpha modules require percpu variables to be defined as diff --git a/include/linux/printk.h b/include/linux/printk.h index 78479633ccfc..fe7eb2351610 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -437,7 +437,7 @@ extern int kptr_restrict; #ifdef CONFIG_PRINTK #define printk_once(fmt, ...) \ ({ \ - static bool __section(.data.once) __print_once; \ + static bool __section(".data.once") __print_once; \ bool __ret_print_once = !__print_once; \ \ if (!__print_once) { \ @@ -448,7 +448,7 @@ extern int kptr_restrict; }) #define printk_deferred_once(fmt, ...) \ ({ \ - static bool __section(.data.once) __print_once; \ + static bool __section(".data.once") __print_once; \ bool __ret_print_once = !__print_once; \ \ if (!__print_once) { \ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7c1ceff02852..6cdd0152c253 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -299,7 +299,7 @@ static inline int rcu_read_lock_any_held(void) */ #define RCU_LOCKDEP_WARN(c, s) \ do { \ - static bool __section(.data.unlikely) __warned; \ + static bool __section(".data.unlikely") __warned; \ if (debug_lockdep_rcu_enabled() && !__warned && (c)) { \ __warned = true; \ lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h index 00c45a0e6abe..ae51f4529fc9 100644 --- a/include/linux/sched/debug.h +++ b/include/linux/sched/debug.h @@ -43,7 +43,7 @@ extern void proc_sched_set_task(struct task_struct *p); #endif /* Attach to any functions which should be ignored in wchan output. */ -#define __sched __attribute__((__section__(".sched.text"))) +#define __sched __section(".sched.text") /* Linker adds these: start and end of __sched functions */ extern char __sched_text_start[], __sched_text_end[]; diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8a99279a579b..ff63c2963359 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -373,7 +373,7 @@ extern const struct earlycon_id *__earlycon_table_end[]; .compatible = compat, \ .setup = fn }; \ static const struct earlycon_id EARLYCON_USED_OR_UNUSED \ - __section(__earlycon_table) \ + __section("__earlycon_table") \ * const __PASTE(__p, unique_id) = &unique_id #define OF_EARLYCON_DECLARE(_name, compat, fn) \ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index f2f12d746dbd..79897841a2cc 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -76,7 +76,7 @@ #define LOCK_SECTION_END \ ".previous\n\t" -#define __lockfunc __attribute__((section(".spinlock.text"))) +#define __lockfunc __section(".spinlock.text") /* * Pull the arch_spinlock_t and arch_rwlock_t definitions: diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2eda7678fe1d..37bea07c12f2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -144,7 +144,7 @@ extern struct trace_event_functions exit_syscall_print_funcs; .flags = TRACE_EVENT_FL_CAP_ANY, \ }; \ static struct trace_event_call __used \ - __attribute__((section("_ftrace_events"))) \ + __section("_ftrace_events") \ *__event_enter_##sname = &event_enter_##sname; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ @@ -160,7 +160,7 @@ extern struct trace_event_functions exit_syscall_print_funcs; .flags = TRACE_EVENT_FL_CAP_ANY, \ }; \ static struct trace_event_call __used \ - __attribute__((section("_ftrace_events"))) \ + __section("_ftrace_events") \ *__event_exit_##sname = &event_exit_##sname; #define SYSCALL_METADATA(sname, nb, ...) \ @@ -184,7 +184,7 @@ extern struct trace_event_functions exit_syscall_print_funcs; .enter_fields = LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \ }; \ static struct syscall_metadata __used \ - __attribute__((section("__syscalls_metadata"))) \ + __section("__syscalls_metadata") \ *__p_syscall_meta_##sname = &__syscall_meta_##sname; static inline int is_syscall_trace_event(struct trace_event_call *tp_event) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 5c6943354049..d321fe5ad1a1 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -709,7 +709,7 @@ do { \ tracing_record_cmdline(current); \ if (__builtin_constant_p(fmt)) { \ static const char *trace_printk_fmt \ - __attribute__((section("__trace_printk_fmt"))) = \ + __section("__trace_printk_fmt") = \ __builtin_constant_p(fmt) ? fmt : NULL; \ \ __trace_bprintk(ip, trace_printk_fmt, ##args); \ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 81fa0b2f271e..0f21617f1a66 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -119,7 +119,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __TRACEPOINT_ENTRY(name) \ static tracepoint_ptr_t __tracepoint_ptr_##name __used \ - __section(__tracepoints_ptrs) = &__tracepoint_##name + __section("__tracepoints_ptrs") = &__tracepoint_##name #endif #endif /* _LINUX_TRACEPOINT_H */ @@ -286,11 +286,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) */ #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args) \ static const char __tpstrtab_##_name[] \ - __section(__tracepoints_strings) = #_name; \ + __section("__tracepoints_strings") = #_name; \ extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name); \ int __traceiter_##_name(void *__data, proto); \ struct tracepoint __tracepoint_##_name __used \ - __section(__tracepoints) = { \ + __section("__tracepoints") = { \ .name = __tpstrtab_##_name, \ .key = STATIC_KEY_INIT_FALSE, \ .static_call_key = &STATIC_CALL_KEY(tp_func_##_name), \ @@ -396,7 +396,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) static const char *___tp_str __tracepoint_string = str; \ ___tp_str; \ }) -#define __tracepoint_string __used __section(__tracepoint_str) +#define __tracepoint_string __used __section("__tracepoint_str") #else /* * tracepoint_string() is used to save the string address for userspace -- cgit v1.2.3 From 44cdc1d952e3f7aa9944c1bbf38fc23f49885017 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 27 Sep 2020 11:18:30 -0400 Subject: convert ->f_ep_links/->fllink to hlist we don't care about the order of elements there Signed-off-by: Al Viro --- include/linux/eventpoll.h | 4 ++-- include/linux/fs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 8f000fada5a4..4e215ccfa792 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -25,7 +25,7 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long t /* Used to initialize the epoll bits inside the "struct file" */ static inline void eventpoll_init_file(struct file *file) { - INIT_LIST_HEAD(&file->f_ep_links); + INIT_HLIST_HEAD(&file->f_ep_links); INIT_LIST_HEAD(&file->f_tfile_llink); } @@ -50,7 +50,7 @@ static inline void eventpoll_release(struct file *file) * because the file in on the way to be removed and nobody ( but * eventpoll ) has still a reference to this file. */ - if (likely(list_empty(&file->f_ep_links))) + if (likely(hlist_empty(&file->f_ep_links))) return; /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 0bd126418bb6..b484ba0e474a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -946,7 +946,7 @@ struct file { #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ - struct list_head f_ep_links; + struct hlist_head f_ep_links; struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; -- cgit v1.2.3 From 319c15174757aaedacc89a6e55c965416f130e64 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 1 Oct 2020 20:45:51 -0400 Subject: epoll: take epitem list out of struct file Move the head of epitem list out of struct file; for epoll ones it's moved into struct eventpoll (->refs there), for non-epoll - into the new object (struct epitem_head). In place of ->f_ep_links we leave a pointer to the list head (->f_ep). ->f_ep is protected by ->f_lock and it's zeroed as soon as the list of epitems becomes empty (that can happen only in ep_remove() by now). The list of files for reverse path check is *not* going through struct file now - it's a single-linked list going through epitem_head instances. It's terminated by ERR_PTR(-1) (== EP_UNACTIVE_POINTER), so the elements of list can be distinguished by head->next != NULL. epitem_head instances are allocated at ep_insert() time (by attach_epitem()) and freed either by ep_remove() (if it empties the set of epitems *and* epitem_head does not belong to the reverse path check list) or by clear_tfile_check_list() when the list is emptied (if the set of epitems is empty by that point). Allocations are done from a separate slab - minimal kmalloc() size is too large on some architectures. As the result, we trim struct file _and_ get rid of the games with temporary file references. Locking and barriers are interesting (aren't they always); see unlist_file() and ep_remove() for details. The non-obvious part is that ep_remove() needs to decide if it will be the one to free the damn thing *before* actually storing NULL to head->epitems.first - that's what smp_load_acquire is for in there. unlist_file() lockless path is safe, since we hit it only if we observe NULL in head->epitems.first and whoever had done that store is guaranteed to have observed non-NULL in head->next. IOW, their last access had been the store of NULL into ->epitems.first and we can safely free the sucker. OTOH, we are under rcu_read_lock() and both epitem and epitem->file have their freeing RCU-delayed. So if we see non-NULL ->epitems.first, we can grab ->f_lock (all epitems in there share the same struct file) and safely recheck the emptiness of ->epitems; again, ->next is still non-NULL, so ep_remove() couldn't have freed head yet. ->f_lock serializes us wrt ep_remove(); the rest is trivial. Note that once head->epitems becomes NULL, nothing can get inserted into it - the only remaining reference to head after that point is from the reverse path check list. Signed-off-by: Al Viro --- include/linux/eventpoll.h | 11 +---------- include/linux/fs.h | 5 ++--- 2 files changed, 3 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 4e215ccfa792..0350393465d4 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -22,14 +22,6 @@ struct file; struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff); #endif -/* Used to initialize the epoll bits inside the "struct file" */ -static inline void eventpoll_init_file(struct file *file) -{ - INIT_HLIST_HEAD(&file->f_ep_links); - INIT_LIST_HEAD(&file->f_tfile_llink); -} - - /* Used to release the epoll bits inside the "struct file" */ void eventpoll_release_file(struct file *file); @@ -50,7 +42,7 @@ static inline void eventpoll_release(struct file *file) * because the file in on the way to be removed and nobody ( but * eventpoll ) has still a reference to this file. */ - if (likely(hlist_empty(&file->f_ep_links))) + if (likely(!file->f_ep)) return; /* @@ -72,7 +64,6 @@ static inline int ep_op_has_event(int op) #else -static inline void eventpoll_init_file(struct file *file) {} static inline void eventpoll_release(struct file *file) {} #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index b484ba0e474a..993c540e3b77 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -923,7 +923,7 @@ struct file { const struct file_operations *f_op; /* - * Protects f_ep_links, f_flags. + * Protects f_ep, f_flags. * Must not be taken from IRQ context. */ spinlock_t f_lock; @@ -946,8 +946,7 @@ struct file { #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ - struct hlist_head f_ep_links; - struct list_head f_tfile_llink; + struct hlist_head *f_ep; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; errseq_t f_wb_err; -- cgit v1.2.3 From 96ffcdf239de6f9970178bb7d643e16fd9e68ab9 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 20 Oct 2020 15:12:12 +0900 Subject: PM / devfreq: Remove redundant governor_name from struct devfreq The devfreq structure instance contains the governor_name and a governor instance. When need to show the governor name, better to use the name of devfreq_governor structure. So, governor_name variable in struct devfreq is a redundant and unneeded variable. Remove the redundant governor_name of struct devfreq and then use the name of devfreq_governor instance. Signed-off-by: Chanwoo Choi --- include/linux/devfreq.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 121a2430d7f7..b6d3bae1c74d 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -15,8 +15,6 @@ #include #include -#define DEVFREQ_NAME_LEN 16 - /* DEVFREQ governor name */ #define DEVFREQ_GOV_SIMPLE_ONDEMAND "simple_ondemand" #define DEVFREQ_GOV_PERFORMANCE "performance" @@ -139,7 +137,6 @@ struct devfreq_stats { * using devfreq. * @profile: device-specific devfreq profile * @governor: method how to choose frequency based on the usage. - * @governor_name: devfreq governor name for use with this devfreq * @nb: notifier block used to notify devfreq object that it should * reevaluate operable frequencies. Devfreq users may use * devfreq.nb to the corresponding register notifier call chain. @@ -176,7 +173,6 @@ struct devfreq { struct device dev; struct devfreq_dev_profile *profile; const struct devfreq_governor *governor; - char governor_name[DEVFREQ_NAME_LEN]; struct notifier_block nb; struct delayed_work work; -- cgit v1.2.3 From e275d2109cdaea8b4554b9eb8a828bdb8f8ba068 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 26 Oct 2020 10:08:47 +0200 Subject: bus: ti-sysc: Fix reset status check for modules with quirks Commit d46f9fbec719 ("bus: ti-sysc: Use optional clocks on for enable and wait for softreset bit") started showing a "OCP softreset timed out" warning on enable if the interconnect target module is not out of reset. This caused the warning to be often triggered for i2c and hdq while the devices are working properly. Turns out that some interconnect target modules seem to have an unusable reset status bits unless the module specific reset quirks are activated. Let's just skip the reset status check for those modules as we only want to activate the reset quirks when doing a reset, and not on enable. This way we don't see the bogus "OCP softreset timed out" warnings during boot. Fixes: d46f9fbec719 ("bus: ti-sysc: Use optional clocks on for enable and wait for softreset bit") Signed-off-by: Tony Lindgren --- include/linux/platform_data/ti-sysc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index c59999ce044e..240dce553a0b 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -50,6 +50,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_MODULE_QUIRK_ENA_RESETDONE BIT(25) #define SYSC_MODULE_QUIRK_PRUSS BIT(24) #define SYSC_MODULE_QUIRK_DSS_RESET BIT(23) #define SYSC_MODULE_QUIRK_RTC_UNLOCK BIT(22) -- cgit v1.2.3 From cb47755725da7b90fecbb2aa82ac3b24a7adb89b Mon Sep 17 00:00:00 2001 From: Zeng Tao Date: Tue, 1 Sep 2020 17:30:13 +0800 Subject: time: Prevent undefined behaviour in timespec64_to_ns() UBSAN reports: Undefined behaviour in ./include/linux/time64.h:127:27 signed integer overflow: 17179869187 * 1000000000 cannot be represented in type 'long long int' Call Trace: timespec64_to_ns include/linux/time64.h:127 [inline] set_cpu_itimer+0x65c/0x880 kernel/time/itimer.c:180 do_setitimer+0x8e/0x740 kernel/time/itimer.c:245 __x64_sys_setitimer+0x14c/0x2c0 kernel/time/itimer.c:336 do_syscall_64+0xa1/0x540 arch/x86/entry/common.c:295 Commit bd40a175769d ("y2038: itimer: change implementation to timespec64") replaced the original conversion which handled time clamping correctly with timespec64_to_ns() which has no overflow protection. Fix it in timespec64_to_ns() as this is not necessarily limited to the usage in itimers. [ tglx: Added comment and adjusted the fixes tag ] Fixes: 361a3bf00582 ("time64: Add time64.h header and define struct timespec64") Signed-off-by: Zeng Tao Signed-off-by: Thomas Gleixner Reviewed-by: Arnd Bergmann Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1598952616-6416-1-git-send-email-prime.zeng@hisilicon.com --- include/linux/time64.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/time64.h b/include/linux/time64.h index c9dcb3e5781f..5117cb5b5656 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -124,6 +124,10 @@ static inline bool timespec64_valid_settod(const struct timespec64 *ts) */ static inline s64 timespec64_to_ns(const struct timespec64 *ts) { + /* Prevent multiplication overflow */ + if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) + return KTIME_MAX; + return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; } -- cgit v1.2.3 From bc3d7bf61a9eaecccc84dc2ecc2a9a3fa4f5ec47 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Sat, 3 Oct 2020 23:25:31 -0400 Subject: elf: Expose ELF header in compat_start_thread() Like it is done for SET_PERSONALITY with x86, which requires the ELF header to select correct personality parameters, x86 requires the headers on compat_start_thread() to choose starting CS for ELF32 binaries, instead of relying on the going-away TIF_IA32/X32 flags. Add an indirection macro to ELF invocations of START_THREAD, that x86 can reimplement to receive the extra parameter just for ELF files. This requires no changes to other architectures who don't need the header information, they can continue to use the original start_thread for ELF and non-ELF binaries, and it prevents affecting non-ELF code paths for x86. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201004032536.1229030-6-krisman@collabora.com --- include/linux/elf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/elf.h b/include/linux/elf.h index 5d5b0321da0b..6dbcfe7a3fd7 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -22,6 +22,11 @@ SET_PERSONALITY(ex) #endif +#ifndef START_THREAD +#define START_THREAD(elf_ex, regs, elf_entry, start_stack) \ + start_thread(regs, elf_entry, start_stack) +#endif + #define ELF32_GNU_PROPERTY_ALIGN 4 #define ELF64_GNU_PROPERTY_ALIGN 8 -- cgit v1.2.3 From 9a29a671902c2be05d636045a4dd365219ca716c Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Sat, 3 Oct 2020 23:25:33 -0400 Subject: elf: Expose ELF header on arch_setup_additional_pages() Like it is done for SET_PERSONALITY with ARM, which requires the ELF header to select correct personality parameters, x86 requires the headers when selecting which VDSO to load, instead of relying on the going-away TIF_IA32/X32 flags. Add an indirection macro to arch_setup_additional_pages(), that x86 can reimplement to receive the extra parameter just for ELF files. This requires no changes to other architectures, who can continue to use the original arch_setup_additional_pages for ELF and non-ELF binaries. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201004032536.1229030-8-krisman@collabora.com --- include/linux/elf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/elf.h b/include/linux/elf.h index 6dbcfe7a3fd7..c9a46c4e183b 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -27,6 +27,11 @@ start_thread(regs, elf_entry, start_stack) #endif +#if defined(ARCH_HAS_SETUP_ADDITIONAL_PAGES) && !defined(ARCH_SETUP_ADDITIONAL_PAGES) +#define ARCH_SETUP_ADDITIONAL_PAGES(bprm, ex, interpreter) \ + arch_setup_additional_pages(bprm, interpreter) +#endif + #define ELF32_GNU_PROPERTY_ALIGN 4 #define ELF64_GNU_PROPERTY_ALIGN 8 -- cgit v1.2.3 From ab589bac553f79d559952aa088480a72258ac5bc Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 19 Oct 2020 13:53:13 +0300 Subject: ASoC: adau1977: remove platform data and move micbias bindings include The change removes the platform_data include/definition. It only contains some values for the MICBIAS. These are moved into 'dt-bindings/sound/adi,adau1977.h' so that they can be used inside device-trees. When moving then, they need to be converted to pre-compiler defines, so that the DT compiler can understand them. The driver then, also needs to include the new 'dt-bindings/sound/adi,adau1977.h' file. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201019105313.24862-1-alexandru.ardelean@analog.com Signed-off-by: Mark Brown --- include/linux/platform_data/adau1977.h | 44 ---------------------------------- 1 file changed, 44 deletions(-) delete mode 100644 include/linux/platform_data/adau1977.h (limited to 'include/linux') diff --git a/include/linux/platform_data/adau1977.h b/include/linux/platform_data/adau1977.h deleted file mode 100644 index 86667235077a..000000000000 --- a/include/linux/platform_data/adau1977.h +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * ADAU1977/ADAU1978/ADAU1979 driver - * - * Copyright 2014 Analog Devices Inc. - * Author: Lars-Peter Clausen - */ - -#ifndef __LINUX_PLATFORM_DATA_ADAU1977_H__ -#define __LINUX_PLATFORM_DATA_ADAU1977_H__ - -/** - * enum adau1977_micbias - ADAU1977 MICBIAS pin voltage setting - * @ADAU1977_MICBIAS_5V0: MICBIAS is set to 5.0 V - * @ADAU1977_MICBIAS_5V5: MICBIAS is set to 5.5 V - * @ADAU1977_MICBIAS_6V0: MICBIAS is set to 6.0 V - * @ADAU1977_MICBIAS_6V5: MICBIAS is set to 6.5 V - * @ADAU1977_MICBIAS_7V0: MICBIAS is set to 7.0 V - * @ADAU1977_MICBIAS_7V5: MICBIAS is set to 7.5 V - * @ADAU1977_MICBIAS_8V0: MICBIAS is set to 8.0 V - * @ADAU1977_MICBIAS_8V5: MICBIAS is set to 8.5 V - * @ADAU1977_MICBIAS_9V0: MICBIAS is set to 9.0 V - */ -enum adau1977_micbias { - ADAU1977_MICBIAS_5V0 = 0x0, - ADAU1977_MICBIAS_5V5 = 0x1, - ADAU1977_MICBIAS_6V0 = 0x2, - ADAU1977_MICBIAS_6V5 = 0x3, - ADAU1977_MICBIAS_7V0 = 0x4, - ADAU1977_MICBIAS_7V5 = 0x5, - ADAU1977_MICBIAS_8V0 = 0x6, - ADAU1977_MICBIAS_8V5 = 0x7, - ADAU1977_MICBIAS_9V0 = 0x8, -}; - -/** - * struct adau1977_platform_data - Platform configuration data for the ADAU1977 - * @micbias: Specifies the voltage for the MICBIAS pin - */ -struct adau1977_platform_data { - enum adau1977_micbias micbias; -}; - -#endif -- cgit v1.2.3 From 6e1e90ec027509a7e8d4efbd77a65b32b5a8b3ec Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Wed, 14 Oct 2020 23:30:24 +0300 Subject: regmap: mmio: add config option to allow relaxed MMIO accesses On some platforms (eg armv7 due to the CONFIG_ARM_DMA_MEM_BUFFERABLE) MMIO R/W operations always add memory barriers which can increase load, decrease battery life or in general reduce performance unnecessarily on devices which access a lot of configuration registers and where ordering does not matter (eg. media accelerators like the Verisilicon / Hantro video decoders). Drivers used to call the relaxed MMIO variants directly but since they are now accessing the MMIO registers via regmaps (to compensate for different VPU HW reg layouts via regmap fields), there is a need for a relaxed API / config to preserve existing behaviour. Cc: Mark Brown Signed-off-by: Adrian Ratiu Link: https://lore.kernel.org/r/20201014203024.954369-1-adrian.ratiu@collabora.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index e7834d98207f..126fe700d1d8 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -315,6 +315,10 @@ typedef void (*regmap_unlock)(void *); * masks are used. * @zero_flag_mask: If set, read_flag_mask and write_flag_mask are used even * if they are both empty. + * @use_relaxed_mmio: If set, MMIO R/W operations will not use memory barriers. + * This can avoid load on devices which don't require strict + * orderings, but drivers should carefully add any explicit + * memory barriers when they may require them. * @use_single_read: If set, converts the bulk read operation into a series of * single read operations. This is useful for a device that * does not support bulk read. @@ -388,6 +392,7 @@ struct regmap_config { bool use_single_read; bool use_single_write; + bool use_relaxed_mmio; bool can_multi_write; enum regmap_endian reg_format_endian; -- cgit v1.2.3 From fbdd0049d98d44914fc57d4b91f867f4996c787b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 26 Oct 2020 15:43:59 +0200 Subject: RDMA/mlx5: Fix devlink deadlock on net namespace deletion When a mlx5 core devlink instance is reloaded in different net namespace, its associated IB device is deleted and recreated. Example sequence is: $ ip netns add foo $ devlink dev reload pci/0000:00:08.0 netns foo $ ip netns del foo mlx5 IB device needs to attach and detach the netdevice to it through the netdev notifier chain during load and unload sequence. A below call graph of the unload flow. cleanup_net() down_read(&pernet_ops_rwsem); <- first sem acquired ops_pre_exit_list() pre_exit() devlink_pernet_pre_exit() devlink_reload() mlx5_devlink_reload_down() mlx5_unload_one() [...] mlx5_ib_remove() mlx5_ib_unbind_slave_port() mlx5_remove_netdev_notifier() unregister_netdevice_notifier() down_write(&pernet_ops_rwsem);<- recurrsive lock Hence, when net namespace is deleted, mlx5 reload results in deadlock. When deadlock occurs, devlink mutex is also held. This not only deadlocks the mlx5 device under reload, but all the processes which attempt to access unrelated devlink devices are deadlocked. Hence, fix this by mlx5 ib driver to register for per net netdev notifier instead of global one, which operats on the net namespace without holding the pernet_ops_rwsem. Fixes: 4383cfcc65e7 ("net/mlx5: Add devlink reload") Link: https://lore.kernel.org/r/20201026134359.23150-1-parav@nvidia.com Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index add85094f9a5..0f23e1ed5e71 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1213,4 +1213,22 @@ static inline bool mlx5_is_roce_enabled(struct mlx5_core_dev *dev) return val.vbool; } +/** + * mlx5_core_net - Provide net namespace of the mlx5_core_dev + * @dev: mlx5 core device + * + * mlx5_core_net() returns the net namespace of mlx5 core device. + * This can be called only in below described limited context. + * (a) When a devlink instance for mlx5_core is registered and + * when devlink reload operation is disabled. + * or + * (b) during devlink reload reload_down() and reload_up callbacks + * where it is ensured that devlink instance's net namespace is + * stable. + */ +static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) +{ + return devlink_net(priv_to_devlink(dev)); +} + #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From ab7cffb8d2367e5b088c7c14452724e719a10eba Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Mon, 26 Oct 2020 20:20:40 +0100 Subject: MIPS: ingenic: remove unused platform_data header file There are no users of this headers file in the kernel. All users are likely migrated to device tree which is a good thing. Signed-off-by: Sam Ravnborg Cc: Thomas Bogendoerfer Cc: Paul Cercueil Cc: Harvey Hunt Cc: linux-mips@vger.kernel.org Reviewed-by: Paul Cercueil Signed-off-by: Thomas Bogendoerfer --- include/linux/platform_data/jz4740/jz4740_nand.h | 25 ------------------------ 1 file changed, 25 deletions(-) delete mode 100644 include/linux/platform_data/jz4740/jz4740_nand.h (limited to 'include/linux') diff --git a/include/linux/platform_data/jz4740/jz4740_nand.h b/include/linux/platform_data/jz4740/jz4740_nand.h deleted file mode 100644 index b3f066d63059..000000000000 --- a/include/linux/platform_data/jz4740/jz4740_nand.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2009-2010, Lars-Peter Clausen - * JZ4740 SoC NAND controller driver - */ - -#ifndef __JZ4740_NAND_H__ -#define __JZ4740_NAND_H__ - -#include -#include - -#define JZ_NAND_NUM_BANKS 4 - -struct jz_nand_platform_data { - int num_partitions; - struct mtd_partition *partitions; - - unsigned char banks[JZ_NAND_NUM_BANKS]; - - void (*ident_callback)(struct platform_device *, struct mtd_info *, - struct mtd_partition **, int *num_partitions); -}; - -#endif -- cgit v1.2.3 From 343a3e8bc635bd4c58d45a4fe67f9c3a78fbd191 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:20:50 +0100 Subject: bpf: Fix -Wshadow warnings There are thousands of warnings about one macro in a W=2 build: include/linux/filter.h:561:6: warning: declaration of 'ret' shadows a previous local [-Wshadow] Prefix all the locals in that macro with __ to avoid most of these warnings. Fixes: 492ecee892c2 ("bpf: enable program stats") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201026162110.3710415-1-arnd@kernel.org --- include/linux/filter.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 72d62cbc1578..1b62397bd124 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -558,21 +558,21 @@ struct sk_filter { DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); #define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \ - u32 ret; \ + u32 __ret; \ cant_migrate(); \ if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ - struct bpf_prog_stats *stats; \ - u64 start = sched_clock(); \ - ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ - stats = this_cpu_ptr(prog->aux->stats); \ - u64_stats_update_begin(&stats->syncp); \ - stats->cnt++; \ - stats->nsecs += sched_clock() - start; \ - u64_stats_update_end(&stats->syncp); \ + struct bpf_prog_stats *__stats; \ + u64 __start = sched_clock(); \ + __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + __stats = this_cpu_ptr(prog->aux->stats); \ + u64_stats_update_begin(&__stats->syncp); \ + __stats->cnt++; \ + __stats->nsecs += sched_clock() - __start; \ + u64_stats_update_end(&__stats->syncp); \ } else { \ - ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ } \ - ret; }) + __ret; }) #define BPF_PROG_RUN(prog, ctx) \ __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func) -- cgit v1.2.3 From 1c534352f47fd83eb08075ac2474f707e74bf7f7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Oct 2020 17:35:19 +0200 Subject: cpufreq: Introduce CPUFREQ_NEED_UPDATE_LIMITS driver flag Generally, a cpufreq driver may need to update some internal upper and lower frequency boundaries on policy max and min changes, respectively, but currently this does not work if the target frequency does not change along with the policy limit. Namely, if the target frequency does not change along with the policy min or max, the "target_freq == policy->cur" check in __cpufreq_driver_target() prevents driver callbacks from being invoked and they do not even have a chance to update the corresponding internal boundary. This particularly affects the "powersave" and "performance" governors that always set the target frequency to one of the policy limits and it never changes when the other limit is updated. To allow cpufreq the drivers needing to update internal frequency boundaries on policy limits changes to avoid this issue, introduce a new driver flag, CPUFREQ_NEED_UPDATE_LIMITS, that (when set) will neutralize the check mentioned above. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index fa37b1c66443..038ed83aab41 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -298,7 +298,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name) struct cpufreq_driver { char name[CPUFREQ_NAME_LEN]; - u8 flags; + u16 flags; void *driver_data; /* needed by all drivers */ @@ -422,6 +422,14 @@ struct cpufreq_driver { */ #define CPUFREQ_IS_COOLING_DEV BIT(7) +/* + * Set by drivers that need to update internale upper and lower boundaries along + * with the target frequency and so the core and governors should also invoke + * the diver if the target frequency does not change, but the policy min or max + * may have changed. + */ +#define CPUFREQ_NEED_UPDATE_LIMITS BIT(8) + int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); -- cgit v1.2.3 From 6d915476e67d99b73a57bceb83cff1cf153d8bf6 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 22 Sep 2020 08:44:50 -0400 Subject: audit: trigger accompanying records when no rules present When there are no audit rules registered, mandatory records (config, etc.) are missing their accompanying records (syscall, proctitle, etc.). This is due to audit context dummy set on syscall entry based on absence of rules that signals that no other records are to be printed. Clear the dummy bit if any record is generated, open coding this in audit_log_start(). The proctitle context and dummy checks are pointless since the proctitle record will not be printed if no syscall records are printed. The fds array is reset to -1 after the first syscall to indicate it isn't valid any more, but was never set to -1 when the context was allocated to indicate it wasn't yet valid. Check ctx->pwd in audit_log_name(). The audit_inode* functions can be called without going through getname_flags() or getname_kernel() that sets audit_names and cwd, so set the cwd in audit_alloc_name() if it has not already been done so due to audit_names being valid and purge all other audit_getcwd() calls. Revert the LSM dump_common_audit_data() LSM_AUDIT_DATA_* cases from the ghak96 patch since they are no longer necessary due to cwd coverage in audit_alloc_name(). Thanks to bauen1 for reporting LSM situations in which context->cwd is not valid, inadvertantly fixed by the ghak96 patch. Please see upstream github issue https://github.com/linux-audit/audit-kernel/issues/120 This is also related to upstream github issue https://github.com/linux-audit/audit-kernel/issues/96 Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/audit.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index b3d859831a31..82b7c1116a85 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -292,7 +292,6 @@ extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1, extern void __audit_syscall_exit(int ret_success, long ret_value); extern struct filename *__audit_reusename(const __user char *uptr); extern void __audit_getname(struct filename *name); -extern void __audit_getcwd(void); extern void __audit_inode(struct filename *name, const struct dentry *dentry, unsigned int flags); extern void __audit_file(const struct file *); @@ -351,11 +350,6 @@ static inline void audit_getname(struct filename *name) if (unlikely(!audit_dummy_context())) __audit_getname(name); } -static inline void audit_getcwd(void) -{ - if (unlikely(audit_context())) - __audit_getcwd(); -} static inline void audit_inode(struct filename *name, const struct dentry *dentry, unsigned int aflags) { @@ -584,8 +578,6 @@ static inline struct filename *audit_reusename(const __user char *name) } static inline void audit_getname(struct filename *name) { } -static inline void audit_getcwd(void) -{ } static inline void audit_inode(struct filename *name, const struct dentry *dentry, unsigned int aflags) -- cgit v1.2.3 From 95de5094f5ac50b6f355f4e7dffcb6f34bd5dada Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 22 Sep 2020 18:24:29 +0800 Subject: firmware: imx: add dummy functions add dummy functions to avoid build failure when header files are included, but drivers are not built. Signed-off-by: Peng Fan Signed-off-by: Shawn Guo --- include/linux/firmware/imx/ipc.h | 13 +++++++++++++ include/linux/firmware/imx/sci.h | 27 +++++++++++++++++++++++++++ include/linux/firmware/imx/svc/misc.h | 19 +++++++++++++++++++ 3 files changed, 59 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/imx/ipc.h b/include/linux/firmware/imx/ipc.h index 891057434858..0b4643571625 100644 --- a/include/linux/firmware/imx/ipc.h +++ b/include/linux/firmware/imx/ipc.h @@ -34,6 +34,7 @@ struct imx_sc_rpc_msg { uint8_t func; }; +#ifdef CONFIG_IMX_SCU /* * This is an function to send an RPC message over an IPC channel. * It is called by client-side SCFW API function shims. @@ -55,4 +56,16 @@ int imx_scu_call_rpc(struct imx_sc_ipc *ipc, void *msg, bool have_resp); * @return Returns an error code (0 = success, failed if < 0) */ int imx_scu_get_handle(struct imx_sc_ipc **ipc); +#else +static inline int imx_scu_call_rpc(struct imx_sc_ipc *ipc, void *msg, + bool have_resp) +{ + return -ENOTSUPP; +} + +static inline int imx_scu_get_handle(struct imx_sc_ipc **ipc) +{ + return -ENOTSUPP; +} +#endif #endif /* _SC_IPC_H */ diff --git a/include/linux/firmware/imx/sci.h b/include/linux/firmware/imx/sci.h index 22c76571a294..5cc63fe7e84d 100644 --- a/include/linux/firmware/imx/sci.h +++ b/include/linux/firmware/imx/sci.h @@ -16,9 +16,36 @@ #include #include +#if IS_ENABLED(CONFIG_IMX_SCU) int imx_scu_enable_general_irq_channel(struct device *dev); int imx_scu_irq_register_notifier(struct notifier_block *nb); int imx_scu_irq_unregister_notifier(struct notifier_block *nb); int imx_scu_irq_group_enable(u8 group, u32 mask, u8 enable); int imx_scu_soc_init(struct device *dev); +#else +static inline int imx_scu_soc_init(struct device *dev) +{ + return -ENOTSUPP; +} + +static inline int imx_scu_enable_general_irq_channel(struct device *dev) +{ + return -ENOTSUPP; +} + +static inline int imx_scu_irq_register_notifier(struct notifier_block *nb) +{ + return -ENOTSUPP; +} + +static inline int imx_scu_irq_unregister_notifier(struct notifier_block *nb) +{ + return -ENOTSUPP; +} + +static inline int imx_scu_irq_group_enable(u8 group, u32 mask, u8 enable) +{ + return -ENOTSUPP; +} +#endif #endif /* _SC_SCI_H */ diff --git a/include/linux/firmware/imx/svc/misc.h b/include/linux/firmware/imx/svc/misc.h index 031dd4d3c766..760db08a67fc 100644 --- a/include/linux/firmware/imx/svc/misc.h +++ b/include/linux/firmware/imx/svc/misc.h @@ -46,6 +46,7 @@ enum imx_misc_func { * Control Functions */ +#ifdef CONFIG_IMX_SCU int imx_sc_misc_set_control(struct imx_sc_ipc *ipc, u32 resource, u8 ctrl, u32 val); @@ -54,5 +55,23 @@ int imx_sc_misc_get_control(struct imx_sc_ipc *ipc, u32 resource, int imx_sc_pm_cpu_start(struct imx_sc_ipc *ipc, u32 resource, bool enable, u64 phys_addr); +#else +static inline int imx_sc_misc_set_control(struct imx_sc_ipc *ipc, + u32 resource, u8 ctrl, u32 val) +{ + return -ENOTSUPP; +} +static inline int imx_sc_misc_get_control(struct imx_sc_ipc *ipc, + u32 resource, u8 ctrl, u32 *val) +{ + return -ENOTSUPP; +} + +static inline int imx_sc_pm_cpu_start(struct imx_sc_ipc *ipc, u32 resource, + bool enable, u64 phys_addr) +{ + return -ENOTSUPP; +} +#endif #endif /* _SC_MISC_API_H */ -- cgit v1.2.3 From 1de111b51b829bcf01d2e57971f8fd07a665fa3f Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 23 Oct 2020 08:47:50 -0700 Subject: KVM: arm64: ARM_SMCCC_ARCH_WORKAROUND_1 doesn't return SMCCC_RET_NOT_REQUIRED According to the SMCCC spec[1](7.5.2 Discovery) the ARM_SMCCC_ARCH_WORKAROUND_1 function id only returns 0, 1, and SMCCC_RET_NOT_SUPPORTED. 0 is "workaround required and safe to call this function" 1 is "workaround not required but safe to call this function" SMCCC_RET_NOT_SUPPORTED is "might be vulnerable or might not be, who knows, I give up!" SMCCC_RET_NOT_SUPPORTED might as well mean "workaround required, except calling this function may not work because it isn't implemented in some cases". Wonderful. We map this SMC call to 0 is SPECTRE_MITIGATED 1 is SPECTRE_UNAFFECTED SMCCC_RET_NOT_SUPPORTED is SPECTRE_VULNERABLE For KVM hypercalls (hvc), we've implemented this function id to return SMCCC_RET_NOT_SUPPORTED, 0, and SMCCC_RET_NOT_REQUIRED. One of those isn't supposed to be there. Per the code we call arm64_get_spectre_v2_state() to figure out what to return for this feature discovery call. 0 is SPECTRE_MITIGATED SMCCC_RET_NOT_REQUIRED is SPECTRE_UNAFFECTED SMCCC_RET_NOT_SUPPORTED is SPECTRE_VULNERABLE Let's clean this up so that KVM tells the guest this mapping: 0 is SPECTRE_MITIGATED 1 is SPECTRE_UNAFFECTED SMCCC_RET_NOT_SUPPORTED is SPECTRE_VULNERABLE Note: SMCCC_RET_NOT_AFFECTED is 1 but isn't part of the SMCCC spec Fixes: c118bbb52743 ("arm64: KVM: Propagate full Spectre v2 workaround state to KVM guests") Signed-off-by: Stephen Boyd Acked-by: Marc Zyngier Acked-by: Will Deacon Cc: Andre Przywara Cc: Steven Price Cc: Marc Zyngier Cc: stable@vger.kernel.org Link: https://developer.arm.com/documentation/den0028/latest [1] Link: https://lore.kernel.org/r/20201023154751.1973872-1-swboyd@chromium.org Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 885c9ffc835c..f860645f6512 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -87,6 +87,8 @@ ARM_SMCCC_SMC_32, \ 0, 0x7fff) +#define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED 1 + /* Paravirtualised time calls (defined by ARM DEN0057A) */ #define ARM_SMCCC_HV_PV_TIME_FEATURES \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ -- cgit v1.2.3 From ea856ec266c1e6aecd2b107032d5b5d661f0686d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20=C4=8Cavoj?= Date: Wed, 21 Oct 2020 00:09:44 +0200 Subject: platform/x86: asus-wmi: Add support for SW_TABLET_MODE on UX360 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The UX360CA has a WMI device id 0x00060062, which reports whether the lid is flipped in tablet mode (1) or in normal laptop mode (0). Add a quirk (quirk_asus_use_lid_flip_devid) for devices on which this WMI device should be used to figure out the SW_TABLET_MODE state, as opposed to the quirk_asus_use_kbd_dock_devid. Additionally, the device needs to be queried on resume and restore because the firmware does not generate an event if the laptop is put to sleep while in tablet mode, flipped to normal mode, and later awoken. It is assumed other UX360* models have the same WMI device. As such, the quirk is applied to devices with DMI_MATCH(DMI_PRODUCT_NAME, "UX360"). More devices with this feature need to be tested and added accordingly. The reason for using an allowlist via the quirk mechanism is that the new WMI device (0x00060062) is also present on some models which do not have a 360 degree hinge (at least FX503VD and GL503VD from Hans' DSTS collection) and therefore its presence cannot be relied on. Signed-off-by: Samuel Čavoj Cc: Hans de Goede Link: https://lore.kernel.org/r/20201020220944.1075530-1-samuel@cavoj.net Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/platform_data/x86/asus-wmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 897b8332a39f..2f274cf52805 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -62,6 +62,7 @@ /* Misc */ #define ASUS_WMI_DEVID_CAMERA 0x00060013 +#define ASUS_WMI_DEVID_LID_FLIP 0x00060062 /* Storage */ #define ASUS_WMI_DEVID_CARDREADER 0x00080013 -- cgit v1.2.3 From cbdc0f54560f94c2205ddbebb5464d65868af0d8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:33:18 +0200 Subject: usb: fix kernel-doc markups There is a common comment marked, instead, with kernel-doc notation. Also, some identifiers have different names between their prototypes and the kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Acked-by: Felipe Balbi Link: https://lore.kernel.org/r/0b964be3884def04fcd20ea5c12cb90d0014871c.1603469755.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 2040696d75b6..a2d229ab63ba 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -437,7 +437,7 @@ static inline struct usb_composite_driver *to_cdriver( #define OS_STRING_IDX 0xEE /** - * struct usb_composite_device - represents one composite usb gadget + * struct usb_composite_dev - represents one composite usb gadget * @gadget: read-only, abstracts the gadget's usb peripheral controller * @req: used for control responses; buffer is pre-allocated * @os_desc_req: used for OS descriptors responses; buffer is pre-allocated -- cgit v1.2.3 From da31de35cd2fb78f75788873e0b61e56d4bb1a90 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Sat, 10 Oct 2020 02:47:49 +0200 Subject: tty: goldfish: use __raw_writel()/__raw_readl() gf_early_console_putchar() uses __raw_writel() but the standard driver uses writel()/readl(). This means we can't use both on the same machine as the device is either big-endian, little-endian or native-endian. As android implementation defines the endianness of the device is the one of the architecture replace all writel()/readl() by __raw_writel()/__raw_readl() https://android.googlesource.com/platform/external/qemu/+/refs/heads/emu-master-dev/hw/char/goldfish_tty.c#222 Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20201010004749.1201695-1-laurent@vivier.eu Signed-off-by: Greg Kroah-Hartman --- include/linux/goldfish.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/goldfish.h b/include/linux/goldfish.h index 265a099cd3b8..12be1601fd84 100644 --- a/include/linux/goldfish.h +++ b/include/linux/goldfish.h @@ -13,9 +13,9 @@ static inline void gf_write_ptr(const void *ptr, void __iomem *portl, { const unsigned long addr = (unsigned long)ptr; - writel(lower_32_bits(addr), portl); + __raw_writel(lower_32_bits(addr), portl); #ifdef CONFIG_64BIT - writel(upper_32_bits(addr), porth); + __raw_writel(upper_32_bits(addr), porth); #endif } @@ -23,9 +23,9 @@ static inline void gf_write_dma_addr(const dma_addr_t addr, void __iomem *portl, void __iomem *porth) { - writel(lower_32_bits(addr), portl); + __raw_writel(lower_32_bits(addr), portl); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - writel(upper_32_bits(addr), porth); + __raw_writel(upper_32_bits(addr), porth); #endif } -- cgit v1.2.3 From caabdd0f59a9771ed095efe3ad5a08867b976ab2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 19 Oct 2020 09:35:39 +0200 Subject: ctype.h: remove duplicate isdigit() helper gcc warns a few thousand times about the isdigit() shadow: include/linux/ctype.h:26:19: warning: declaration of 'isdigit' shadows a built-in function [-Wshadow] As there is already a compiler builtin, just use that, and make it clear we do that by defining a macro. Unfortunately, clang does not have the isdigit() builtin, so this has to be conditional. Signed-off-by: Arnd Bergmann --- include/linux/compiler_types.h | 11 +++++++++++ include/linux/ctype.h | 15 +++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6e390d58a9f8..5d41dff3f79c 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -64,6 +64,17 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } /* Attributes */ #include +/* Builtins */ + +/* + * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21. + * In the meantime, to support gcc < 10, we implement __has_builtin + * by hand. + */ +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + /* Compiler specific macros. */ #ifdef __clang__ #include diff --git a/include/linux/ctype.h b/include/linux/ctype.h index 363b004426db..bc95aef2219c 100644 --- a/include/linux/ctype.h +++ b/include/linux/ctype.h @@ -2,6 +2,8 @@ #ifndef _LINUX_CTYPE_H #define _LINUX_CTYPE_H +#include + /* * NOTE! This ctype does not handle EOF like the standard C * library is required to. @@ -23,10 +25,6 @@ extern const unsigned char _ctype[]; #define isalnum(c) ((__ismask(c)&(_U|_L|_D)) != 0) #define isalpha(c) ((__ismask(c)&(_U|_L)) != 0) #define iscntrl(c) ((__ismask(c)&(_C)) != 0) -static inline int isdigit(int c) -{ - return '0' <= c && c <= '9'; -} #define isgraph(c) ((__ismask(c)&(_P|_U|_L|_D)) != 0) #define islower(c) ((__ismask(c)&(_L)) != 0) #define isprint(c) ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0) @@ -39,6 +37,15 @@ static inline int isdigit(int c) #define isascii(c) (((unsigned char)(c))<=0x7f) #define toascii(c) (((unsigned char)(c))&0x7f) +#if __has_builtin(__builtin_isdigit) +#define isdigit(c) __builtin_isdigit(c) +#else +static inline int isdigit(int c) +{ + return '0' <= c && c <= '9'; +} +#endif + static inline unsigned char __tolower(unsigned char c) { if (isupper(c)) -- cgit v1.2.3 From 13150bc5416f45234c955e5bed91623d178c6117 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 27 Oct 2020 16:11:32 +0100 Subject: module: use hidden visibility for weak symbol references Geert reports that commit be2881824ae9eb92 ("arm64/build: Assert for unwanted sections") results in build errors on arm64 for configurations that have CONFIG_MODULES disabled. The commit in question added ASSERT()s to the arm64 linker script to ensure that linker generated sections such as .got.plt etc are empty, but as it turns out, there are corner cases where the linker does emit content into those sections. More specifically, weak references to function symbols (which can remain unsatisfied, and can therefore not be emitted as relative references) will be emitted as GOT and PLT entries when linking the kernel in PIE mode (which is the case when CONFIG_RELOCATABLE is enabled, which is on by default). What happens is that code such as struct device *(*fn)(struct device *dev); struct device *iommu_device; fn = symbol_get(mdev_get_iommu_device); if (fn) { iommu_device = fn(dev); essentially gets converted into the following when CONFIG_MODULES is off: struct device *iommu_device; if (&mdev_get_iommu_device) { iommu_device = mdev_get_iommu_device(dev); where mdev_get_iommu_device is emitted as a weak symbol reference into the object file. The first reference is decorated with an ordinary ABS64 data relocation (which yields 0x0 if the reference remains unsatisfied). However, the indirect call is turned into a direct call covered by a R_AARCH64_CALL26 relocation, which is converted into a call via a PLT entry taking the target address from the associated GOT entry. Given that such GOT and PLT entries are unnecessary for fully linked binaries such as the kernel, let's give these weak symbol references hidden visibility, so that the linker knows that the weak reference via R_AARCH64_CALL26 can simply remain unsatisfied. Signed-off-by: Ard Biesheuvel Tested-by: Geert Uytterhoeven Reviewed-by: Fangrui Song Acked-by: Jessica Yu Cc: Jessica Yu Cc: Kees Cook Cc: Geert Uytterhoeven Cc: Nick Desaulniers Link: https://lore.kernel.org/r/20201027151132.14066-1-ardb@kernel.org Signed-off-by: Will Deacon --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 7ccdf87f376f..6264617bab4d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -740,7 +740,7 @@ static inline bool within_module(unsigned long addr, const struct module *mod) } /* Get/put a kernel symbol (calls should be symmetric) */ -#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); }) +#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak,visibility("hidden"))); &(x); }) #define symbol_put(x) do { } while (0) #define symbol_put_addr(x) do { } while (0) -- cgit v1.2.3 From 39613eaad3ceff320da344427a70c655e783475e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 28 Oct 2020 12:16:10 +0530 Subject: qcom-geni-se: remove has_opp_table has_opp_table isn't used anymore, remove it. Signed-off-by: Viresh Kumar Link: https://lore.kernel.org/r/08ec1ee1d4252a266956abb5f1e0e0026d753564.1603867487.git.viresh.kumar@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/qcom-geni-se.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h index f7bbea3f09ca..ec2ad4b0fe14 100644 --- a/include/linux/qcom-geni-se.h +++ b/include/linux/qcom-geni-se.h @@ -48,7 +48,6 @@ struct geni_icc_path { * @clk_perf_tbl: Table of clock frequency input to serial engine clock * @icc_paths: Array of ICC paths for SE * @opp_table: Pointer to the OPP table - * @has_opp_table: Specifies if the SE has an OPP table */ struct geni_se { void __iomem *base; @@ -59,7 +58,6 @@ struct geni_se { unsigned long *clk_perf_tbl; struct geni_icc_path icc_paths[3]; struct opp_table *opp_table; - bool has_opp_table; }; /* Common SE registers */ -- cgit v1.2.3 From f1f37abbe6fc2b1242f78157db76e48dbf9518ee Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 19 Oct 2020 15:40:46 +0200 Subject: gpio: Retire the explicit gpio irqchip code Now that all gpiolib irqchip users have been over to use the irqchip template, we can finally retire the old code path and leave just one way in to the irqchip: set up the template when registering the gpio_chip. For a while we had two code paths for this which was a bit confusing. This brings this work to a conclusion, there is now one way of doing this. Signed-off-by: Linus Walleij Reviewed-by: Andy Shevchenko Cc: Thierry Reding Link: https://lore.kernel.org/r/20201019134046.65101-1-linus.walleij@linaro.org --- include/linux/gpio/driver.h | 71 --------------------------------------------- 1 file changed, 71 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 4a7e295c3640..286de0520574 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -621,83 +621,12 @@ int gpiochip_irq_domain_activate(struct irq_domain *domain, void gpiochip_irq_domain_deactivate(struct irq_domain *domain, struct irq_data *data); -void gpiochip_set_nested_irqchip(struct gpio_chip *gc, - struct irq_chip *irqchip, - unsigned int parent_irq); - -int gpiochip_irqchip_add_key(struct gpio_chip *gc, - struct irq_chip *irqchip, - unsigned int first_irq, - irq_flow_handler_t handler, - unsigned int type, - bool threaded, - struct lock_class_key *lock_key, - struct lock_class_key *request_key); - bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gc, unsigned int offset); int gpiochip_irqchip_add_domain(struct gpio_chip *gc, struct irq_domain *domain); -#ifdef CONFIG_LOCKDEP - -/* - * Lockdep requires that each irqchip instance be created with a - * unique key so as to avoid unnecessary warnings. This upfront - * boilerplate static inlines provides such a key for each - * unique instance. - */ -static inline int gpiochip_irqchip_add(struct gpio_chip *gc, - struct irq_chip *irqchip, - unsigned int first_irq, - irq_flow_handler_t handler, - unsigned int type) -{ - static struct lock_class_key lock_key; - static struct lock_class_key request_key; - - return gpiochip_irqchip_add_key(gc, irqchip, first_irq, - handler, type, false, - &lock_key, &request_key); -} - -static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gc, - struct irq_chip *irqchip, - unsigned int first_irq, - irq_flow_handler_t handler, - unsigned int type) -{ - - static struct lock_class_key lock_key; - static struct lock_class_key request_key; - - return gpiochip_irqchip_add_key(gc, irqchip, first_irq, - handler, type, true, - &lock_key, &request_key); -} -#else /* ! CONFIG_LOCKDEP */ -static inline int gpiochip_irqchip_add(struct gpio_chip *gc, - struct irq_chip *irqchip, - unsigned int first_irq, - irq_flow_handler_t handler, - unsigned int type) -{ - return gpiochip_irqchip_add_key(gc, irqchip, first_irq, - handler, type, false, NULL, NULL); -} - -static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gc, - struct irq_chip *irqchip, - unsigned int first_irq, - irq_flow_handler_t handler, - unsigned int type) -{ - return gpiochip_irqchip_add_key(gc, irqchip, first_irq, - handler, type, true, NULL, NULL); -} -#endif /* CONFIG_LOCKDEP */ - int gpiochip_generic_request(struct gpio_chip *gc, unsigned int offset); void gpiochip_generic_free(struct gpio_chip *gc, unsigned int offset); int gpiochip_generic_config(struct gpio_chip *gc, unsigned int offset, -- cgit v1.2.3 From 6a6223ec7779dfdabb9c2567bb42079bc300cf27 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:13 +0100 Subject: blk-mq: docs: add kernel-doc description for a new struct member As reported by kernel-doc: ./include/linux/blk-mq.h:267: warning: Function parameter or member 'active_queues_shared_sbitmap' not described in 'blk_mq_tag_set' There is now a new member for struct blk_mq_tag_set. Add a description for it, based on the commit that introduced it. Fixes: f1b49fdc1c64 ("blk-mq: Record active_queues_shared_sbitmap per tag_set for when using shared sbitmap") Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jens Axboe Reviewed-by: John Garry Link: https://lore.kernel.org/r/8e513153b83eefc05e358f51f2632b592c3f6772.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b23eeca4d677..794b2a33a2c3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -235,6 +235,8 @@ enum hctx_type { * @flags: Zero or more BLK_MQ_F_* flags. * @driver_data: Pointer to data owned by the block driver that created this * tag set. + * @active_queues_shared_sbitmap: + * number of active request queues per tag set. * @__bitmap_tags: A shared tags sbitmap, used over all hctx's * @__breserved_tags: * A shared reserved tags sbitmap, used over all hctx's -- cgit v1.2.3 From 89b422354409c275e898d26607201797cc05a932 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:17 +0100 Subject: mm: pagemap.h: fix two kernel-doc markups Changeset a8cf7f272b5a ("mm: add find_lock_head") renamed the index parameter, but forgot to update the kernel-doc markups accordingly. Fixes: a8cf7f272b5a ("mm: add find_lock_head") Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/dce89b296a4f5f9f8f798d5e76b6736c14a916ac.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/pagemap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index c77b7c31b2e4..e1e19c1f9ec9 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -344,9 +344,9 @@ static inline struct page *find_get_page_flags(struct address_space *mapping, /** * find_lock_page - locate, pin and lock a pagecache page * @mapping: the address_space to search - * @offset: the page index + * @index: the page index * - * Looks up the page cache entry at @mapping & @offset. If there is a + * Looks up the page cache entry at @mapping & @index. If there is a * page cache page, it is returned locked and with an increased * refcount. * @@ -363,9 +363,9 @@ static inline struct page *find_lock_page(struct address_space *mapping, /** * find_lock_head - Locate, pin and lock a pagecache page. * @mapping: The address_space to search. - * @offset: The page index. + * @index: The page index. * - * Looks up the page cache entry at @mapping & @offset. If there is a + * Looks up the page cache entry at @mapping & @index. If there is a * page cache page, its head page is returned locked and with an increased * refcount. * -- cgit v1.2.3 From e86c6569c588a01f20e7554cc245f8fae831957b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:18 +0100 Subject: net: phy: remove kernel-doc duplication Sphinx 3 now checks for duplicated function declarations: .../Documentation/networking/kapi:143: ../include/linux/phy.h:163: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'unsigned int phy_supported_speeds (struct phy_device *phy, unsigned int *speeds, unsigned int size)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1034: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int phy_read_mmd (struct phy_device *phydev, int devad, u32 regnum)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1076: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int __phy_read_mmd (struct phy_device *phydev, int devad, u32 regnum)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1088: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int phy_write_mmd (struct phy_device *phydev, int devad, u32 regnum, u16 val)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1100: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int __phy_write_mmd (struct phy_device *phydev, int devad, u32 regnum, u16 val)'. It turns that both the C and the H files have the same kernel-doc markup for the same functions. Let's drop the at the header file, keeping the one closer to the code. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/75e9a357f9a716833d2094b04898754876365e68.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/phy.h | 40 +++++----------------------------------- 1 file changed, 5 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index eb3cb1a98b45..56563e5e0dc7 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -147,16 +147,8 @@ typedef enum { PHY_INTERFACE_MODE_MAX, } phy_interface_t; -/** +/* * phy_supported_speeds - return all speeds currently supported by a PHY device - * @phy: The PHY device to return supported speeds of. - * @speeds: buffer to store supported speeds in. - * @size: size of speeds buffer. - * - * Description: Returns the number of supported speeds, and fills - * the speeds buffer with the supported speeds. If speeds buffer is - * too small to contain all currently supported speeds, will return as - * many speeds as can fit. */ unsigned int phy_supported_speeds(struct phy_device *phy, unsigned int *speeds, @@ -1022,14 +1014,9 @@ static inline int __phy_modify_changed(struct phy_device *phydev, u32 regnum, regnum, mask, set); } -/** +/* * phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to read from - * @regnum: The register on the MMD to read - * - * Same rules as for phy_read(); */ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); @@ -1064,38 +1051,21 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); __ret; \ }) -/** +/* * __phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to read from - * @regnum: The register on the MMD to read - * - * Same rules as for __phy_read(); */ int __phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); -/** +/* * phy_write_mmd - Convenience function for writing a register * on an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to write to - * @regnum: The register on the MMD to read - * @val: value to write to @regnum - * - * Same rules as for phy_write(); */ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); -/** +/* * __phy_write_mmd - Convenience function for writing a register * on an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to write to - * @regnum: The register on the MMD to read - * @val: value to write to @regnum - * - * Same rules as for __phy_write(); */ int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); -- cgit v1.2.3 From cf38cc9f1e71151f22584c40357afaab6609384b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:23 +0100 Subject: locking/refcount: move kernel-doc markups to the proper place Changeset a435b9a14356 ("locking/refcount: Provide __refcount API to obtain the old value") added a set of functions starting with __ that have a new parameter, adding a series of new warnings: $ ./scripts/kernel-doc -none include/linux/refcount.h include/linux/refcount.h:169: warning: Function parameter or member 'oldp' not described in '__refcount_add_not_zero' include/linux/refcount.h:208: warning: Function parameter or member 'oldp' not described in '__refcount_add' include/linux/refcount.h:239: warning: Function parameter or member 'oldp' not described in '__refcount_inc_not_zero' include/linux/refcount.h:261: warning: Function parameter or member 'oldp' not described in '__refcount_inc' include/linux/refcount.h:291: warning: Function parameter or member 'oldp' not described in '__refcount_sub_and_test' include/linux/refcount.h:327: warning: Function parameter or member 'oldp' not described in '__refcount_dec_and_test' include/linux/refcount.h:347: warning: Function parameter or member 'oldp' not described in '__refcount_dec' The issue is that the kernel-doc markups are now misplaced, as they should be added just before the functions. So, move the kernel-doc markups to the proper places, in order to drop the warnings. It should be noticed that git show produces a crappy output, for this patch without "--patience" flag. Fixes: a435b9a14356 ("locking/refcount: Provide __refcount API to obtain the old value") Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/7985c31d1ace591bc5e1faa05c367f1295b78afd.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/refcount.h | 130 +++++++++++++++++++++++------------------------ 1 file changed, 65 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 7fabb1af18e0..497990c69b0b 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -147,24 +147,6 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -/** - * refcount_add_not_zero - add a value to a refcount unless it is 0 - * @i: the value to add to the refcount - * @r: the refcount - * - * Will saturate at REFCOUNT_SATURATED and WARN. - * - * Provides no memory ordering, it is assumed the caller has guaranteed the - * object memory to be stable (RCU, etc.). It does provide a control dependency - * and thereby orders future stores. See the comment on top. - * - * Use of this function is not recommended for the normal reference counting - * use case in which references are taken and released one at a time. In these - * cases, refcount_inc(), or one of its variants, should instead be used to - * increment a reference count. - * - * Return: false if the passed refcount is 0, true otherwise - */ static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) { int old = refcount_read(r); @@ -183,17 +165,12 @@ static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, in return old; } -static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) -{ - return __refcount_add_not_zero(i, r, NULL); -} - /** - * refcount_add - add a value to a refcount + * refcount_add_not_zero - add a value to a refcount unless it is 0 * @i: the value to add to the refcount * @r: the refcount * - * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN. + * Will saturate at REFCOUNT_SATURATED and WARN. * * Provides no memory ordering, it is assumed the caller has guaranteed the * object memory to be stable (RCU, etc.). It does provide a control dependency @@ -203,7 +180,14 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) * use case in which references are taken and released one at a time. In these * cases, refcount_inc(), or one of its variants, should instead be used to * increment a reference count. + * + * Return: false if the passed refcount is 0, true otherwise */ +static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) +{ + return __refcount_add_not_zero(i, r, NULL); +} + static inline void __refcount_add(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_add_relaxed(i, &r->refs); @@ -217,11 +201,32 @@ static inline void __refcount_add(int i, refcount_t *r, int *oldp) refcount_warn_saturate(r, REFCOUNT_ADD_OVF); } +/** + * refcount_add - add a value to a refcount + * @i: the value to add to the refcount + * @r: the refcount + * + * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc(), or one of its variants, should instead be used to + * increment a reference count. + */ static inline void refcount_add(int i, refcount_t *r) { __refcount_add(i, r, NULL); } +static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp) +{ + return __refcount_add_not_zero(1, r, oldp); +} + /** * refcount_inc_not_zero - increment a refcount unless it is 0 * @r: the refcount to increment @@ -235,14 +240,14 @@ static inline void refcount_add(int i, refcount_t *r) * * Return: true if the increment was successful, false otherwise */ -static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp) +static inline __must_check bool refcount_inc_not_zero(refcount_t *r) { - return __refcount_add_not_zero(1, r, oldp); + return __refcount_inc_not_zero(r, NULL); } -static inline __must_check bool refcount_inc_not_zero(refcount_t *r) +static inline void __refcount_inc(refcount_t *r, int *oldp) { - return __refcount_inc_not_zero(r, NULL); + __refcount_add(1, r, oldp); } /** @@ -257,14 +262,27 @@ static inline __must_check bool refcount_inc_not_zero(refcount_t *r) * Will WARN if the refcount is 0, as this represents a possible use-after-free * condition. */ -static inline void __refcount_inc(refcount_t *r, int *oldp) +static inline void refcount_inc(refcount_t *r) { - __refcount_add(1, r, oldp); + __refcount_inc(r, NULL); } -static inline void refcount_inc(refcount_t *r) +static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) { - __refcount_inc(r, NULL); + int old = atomic_fetch_sub_release(i, &r->refs); + + if (oldp) + *oldp = old; + + if (old == i) { + smp_acquire__after_ctrl_dep(); + return true; + } + + if (unlikely(old < 0 || old - i < 0)) + refcount_warn_saturate(r, REFCOUNT_SUB_UAF); + + return false; } /** @@ -287,27 +305,14 @@ static inline void refcount_inc(refcount_t *r) * * Return: true if the resulting refcount is 0, false otherwise */ -static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) +static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) { - int old = atomic_fetch_sub_release(i, &r->refs); - - if (oldp) - *oldp = old; - - if (old == i) { - smp_acquire__after_ctrl_dep(); - return true; - } - - if (unlikely(old < 0 || old - i < 0)) - refcount_warn_saturate(r, REFCOUNT_SUB_UAF); - - return false; + return __refcount_sub_and_test(i, r, NULL); } -static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) +static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp) { - return __refcount_sub_and_test(i, r, NULL); + return __refcount_sub_and_test(1, r, oldp); } /** @@ -323,26 +328,11 @@ static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) * * Return: true if the resulting refcount is 0, false otherwise */ -static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp) -{ - return __refcount_sub_and_test(1, r, oldp); -} - static inline __must_check bool refcount_dec_and_test(refcount_t *r) { return __refcount_dec_and_test(r, NULL); } -/** - * refcount_dec - decrement a refcount - * @r: the refcount - * - * Similar to atomic_dec(), it will WARN on underflow and fail to decrement - * when saturated at REFCOUNT_SATURATED. - * - * Provides release memory ordering, such that prior loads and stores are done - * before. - */ static inline void __refcount_dec(refcount_t *r, int *oldp) { int old = atomic_fetch_sub_release(1, &r->refs); @@ -354,6 +344,16 @@ static inline void __refcount_dec(refcount_t *r, int *oldp) refcount_warn_saturate(r, REFCOUNT_DEC_LEAK); } +/** + * refcount_dec - decrement a refcount + * @r: the refcount + * + * Similar to atomic_dec(), it will WARN on underflow and fail to decrement + * when saturated at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before. + */ static inline void refcount_dec(refcount_t *r) { __refcount_dec(r, NULL); -- cgit v1.2.3 From e029c5f2798720b463e8df0e184a4d1036311b43 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Mon, 26 Oct 2020 21:49:14 -0700 Subject: ext4: make num of fast commit blocks configurable This patch reserves a field in the jbd2 superblock for number of fast commit blocks. When this value is non-zero, Ext4 uses this field to set the number of fast commit blocks. Fixes: 6866d7b3f2bb ("ext4/jbd2: add fast commit initialization") Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201027044915.2553163-2-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index fb3d71ad6eea..7e88bbc16ffb 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -263,7 +263,10 @@ typedef struct journal_superblock_s /* 0x0050 */ __u8 s_checksum_type; /* checksum type */ __u8 s_padding2[3]; - __u32 s_padding[42]; +/* 0x0054 */ + __be32 s_num_fc_blks; /* Number of fast commit blocks */ +/* 0x0058 */ + __u32 s_padding[41]; __be32 s_checksum; /* crc32c(superblock) */ /* 0x0100 */ -- cgit v1.2.3 From ea4b01d9b81f5f381fc6832bc31046878a2d1a5d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:27 +0100 Subject: jbd2: fix a kernel-doc markup The kernel-doc markup that documents _fc_replay_callback is missing an asterisk, causing this warning: ../include/linux/jbd2.h:1271: warning: Function parameter or member 'j_fc_replay_callback' not described in 'journal_s' When building the docs. Fixes: 609f928af48f ("jbd2: fast commit recovery path") Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/6055927ada2015b55b413cdd2670533bdc9a8da2.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 7e88bbc16ffb..1d5566af48ac 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1256,7 +1256,7 @@ struct journal_s */ void (*j_fc_cleanup_callback)(struct journal_s *journal, int); - /* + /** * @j_fc_replay_callback: * * File-system specific function that performs replay of a fast -- cgit v1.2.3 From 80ade22c06ca115b81dd168e99479c8e09843513 Mon Sep 17 00:00:00 2001 From: Sudeep Dutt Date: Tue, 27 Oct 2020 20:14:15 -0700 Subject: misc: mic: remove the MIC drivers This patch removes the MIC drivers from the kernel tree since the corresponding devices have been discontinued. Removing the dma and char-misc changes in one patch and merging via the char-misc tree is best to avoid any potential build breakage. Cc: Nikhil Rao Reviewed-by: Ashutosh Dixit Signed-off-by: Sudeep Dutt Acked-By: Vinod Koul Reviewed-by: Sherry Sun Link: https://lore.kernel.org/r/8c1443136563de34699d2c084df478181c205db4.1603854416.git.sudeep.dutt@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/mic_bus.h | 100 ---- include/linux/scif.h | 1339 ----------------------------------------------- 2 files changed, 1439 deletions(-) delete mode 100644 include/linux/mic_bus.h delete mode 100644 include/linux/scif.h (limited to 'include/linux') diff --git a/include/linux/mic_bus.h b/include/linux/mic_bus.h deleted file mode 100644 index e99c789424e0..000000000000 --- a/include/linux/mic_bus.h +++ /dev/null @@ -1,100 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Intel MIC Platform Software Stack (MPSS) - * - * Copyright(c) 2014 Intel Corporation. - * - * Intel MIC Bus driver. - * - * This implementation is very similar to the virtio bus driver - * implementation @ include/linux/virtio.h. - */ -#ifndef _MIC_BUS_H_ -#define _MIC_BUS_H_ -/* - * Everything a mbus driver needs to work with any particular mbus - * implementation. - */ -#include -#include - -struct mbus_device_id { - __u32 device; - __u32 vendor; -}; - -#define MBUS_DEV_DMA_HOST 2 -#define MBUS_DEV_DMA_MIC 3 -#define MBUS_DEV_ANY_ID 0xffffffff - -/** - * mbus_device - representation of a device using mbus - * @mmio_va: virtual address of mmio space - * @hw_ops: the hardware ops supported by this device. - * @id: the device type identification (used to match it with a driver). - * @dev: underlying device. - * be used to communicate with. - * @index: unique position on the mbus bus - */ -struct mbus_device { - void __iomem *mmio_va; - struct mbus_hw_ops *hw_ops; - struct mbus_device_id id; - struct device dev; - int index; -}; - -/** - * mbus_driver - operations for a mbus I/O driver - * @driver: underlying device driver (populate name and owner). - * @id_table: the ids serviced by this driver. - * @probe: the function to call when a device is found. Returns 0 or -errno. - * @remove: the function to call when a device is removed. - */ -struct mbus_driver { - struct device_driver driver; - const struct mbus_device_id *id_table; - int (*probe)(struct mbus_device *dev); - void (*scan)(struct mbus_device *dev); - void (*remove)(struct mbus_device *dev); -}; - -/** - * struct mic_irq - opaque pointer used as cookie - */ -struct mic_irq; - -/** - * mbus_hw_ops - Hardware operations for accessing a MIC device on the MIC bus. - */ -struct mbus_hw_ops { - struct mic_irq* (*request_threaded_irq)(struct mbus_device *mbdev, - irq_handler_t handler, - irq_handler_t thread_fn, - const char *name, void *data, - int intr_src); - void (*free_irq)(struct mbus_device *mbdev, - struct mic_irq *cookie, void *data); - void (*ack_interrupt)(struct mbus_device *mbdev, int num); -}; - -struct mbus_device * -mbus_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops, - struct mbus_hw_ops *hw_ops, int index, - void __iomem *mmio_va); -void mbus_unregister_device(struct mbus_device *mbdev); - -int mbus_register_driver(struct mbus_driver *drv); -void mbus_unregister_driver(struct mbus_driver *drv); - -static inline struct mbus_device *dev_to_mbus(struct device *_dev) -{ - return container_of(_dev, struct mbus_device, dev); -} - -static inline struct mbus_driver *drv_to_mbus(struct device_driver *drv) -{ - return container_of(drv, struct mbus_driver, driver); -} - -#endif /* _MIC_BUS_H */ diff --git a/include/linux/scif.h b/include/linux/scif.h deleted file mode 100644 index 329e695b8fe5..000000000000 --- a/include/linux/scif.h +++ /dev/null @@ -1,1339 +0,0 @@ -/* - * Intel MIC Platform Software Stack (MPSS) - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Copyright(c) 2014 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Intel SCIF driver. - * - */ -#ifndef __SCIF_H__ -#define __SCIF_H__ - -#include -#include -#include -#include - -#define SCIF_ACCEPT_SYNC 1 -#define SCIF_SEND_BLOCK 1 -#define SCIF_RECV_BLOCK 1 - -enum { - SCIF_PROT_READ = (1 << 0), - SCIF_PROT_WRITE = (1 << 1) -}; - -enum { - SCIF_MAP_FIXED = 0x10, - SCIF_MAP_KERNEL = 0x20, -}; - -enum { - SCIF_FENCE_INIT_SELF = (1 << 0), - SCIF_FENCE_INIT_PEER = (1 << 1), - SCIF_SIGNAL_LOCAL = (1 << 4), - SCIF_SIGNAL_REMOTE = (1 << 5) -}; - -enum { - SCIF_RMA_USECPU = (1 << 0), - SCIF_RMA_USECACHE = (1 << 1), - SCIF_RMA_SYNC = (1 << 2), - SCIF_RMA_ORDERED = (1 << 3) -}; - -/* End of SCIF Admin Reserved Ports */ -#define SCIF_ADMIN_PORT_END 1024 - -/* End of SCIF Reserved Ports */ -#define SCIF_PORT_RSVD 1088 - -typedef struct scif_endpt *scif_epd_t; -typedef struct scif_pinned_pages *scif_pinned_pages_t; - -/** - * struct scif_range - SCIF registered range used in kernel mode - * @cookie: cookie used internally by SCIF - * @nr_pages: number of pages of PAGE_SIZE - * @prot_flags: R/W protection - * @phys_addr: Array of bus addresses - * @va: Array of kernel virtual addresses backed by the pages in the phys_addr - * array. The va is populated only when called on the host for a remote - * SCIF connection on MIC. This is required to support the use case of DMA - * between MIC and another device which is not a SCIF node e.g., an IB or - * ethernet NIC. - */ -struct scif_range { - void *cookie; - int nr_pages; - int prot_flags; - dma_addr_t *phys_addr; - void __iomem **va; -}; - -/** - * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll - * @epd: SCIF endpoint - * @events: requested events - * @revents: returned events - */ -struct scif_pollepd { - scif_epd_t epd; - __poll_t events; - __poll_t revents; -}; - -/** - * scif_peer_dev - representation of a peer SCIF device - * - * Peer devices show up as PCIe devices for the mgmt node but not the cards. - * The mgmt node discovers all the cards on the PCIe bus and informs the other - * cards about their peers. Upon notification of a peer a node adds a peer - * device to the peer bus to maintain symmetry in the way devices are - * discovered across all nodes in the SCIF network. - * - * @dev: underlying device - * @dnode - The destination node which this device will communicate with. - */ -struct scif_peer_dev { - struct device dev; - u8 dnode; -}; - -/** - * scif_client - representation of a SCIF client - * @name: client name - * @probe - client method called when a peer device is registered - * @remove - client method called when a peer device is unregistered - * @si - subsys_interface used internally for implementing SCIF clients - */ -struct scif_client { - const char *name; - void (*probe)(struct scif_peer_dev *spdev); - void (*remove)(struct scif_peer_dev *spdev); - struct subsys_interface si; -}; - -#define SCIF_OPEN_FAILED ((scif_epd_t)-1) -#define SCIF_REGISTER_FAILED ((off_t)-1) -#define SCIF_MMAP_FAILED ((void *)-1) - -/** - * scif_open() - Create an endpoint - * - * Return: - * Upon successful completion, scif_open() returns an endpoint descriptor to - * be used in subsequent SCIF functions calls to refer to that endpoint; - * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is - * returned and errno is set to indicate the error; in kernel mode a NULL - * scif_epd_t is returned. - * - * Errors: - * ENOMEM - Insufficient kernel memory was available - */ -scif_epd_t scif_open(void); - -/** - * scif_bind() - Bind an endpoint to a port - * @epd: endpoint descriptor - * @pn: port number - * - * scif_bind() binds endpoint epd to port pn, where pn is a port number on the - * local node. If pn is zero, a port number greater than or equal to - * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to - * exactly one local port. Ports less than 1024 when requested can only be bound - * by system (or root) processes or by processes executed by privileged users. - * - * Return: - * Upon successful completion, scif_bind() returns the port number to which epd - * is bound; otherwise in user mode -1 is returned and errno is set to - * indicate the error; in kernel mode the negative of one of the following - * errors is returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * EINVAL - the endpoint or the port is already bound - * EISCONN - The endpoint is already connected - * ENOSPC - No port number available for assignment - * EACCES - The port requested is protected and the user is not the superuser - */ -int scif_bind(scif_epd_t epd, u16 pn); - -/** - * scif_listen() - Listen for connections on an endpoint - * @epd: endpoint descriptor - * @backlog: maximum pending connection requests - * - * scif_listen() marks the endpoint epd as a listening endpoint - that is, as - * an endpoint that will be used to accept incoming connection requests. Once - * so marked, the endpoint is said to be in the listening state and may not be - * used as the endpoint of a connection. - * - * The endpoint, epd, must have been bound to a port. - * - * The backlog argument defines the maximum length to which the queue of - * pending connections for epd may grow. If a connection request arrives when - * the queue is full, the client may receive an error with an indication that - * the connection was refused. - * - * Return: - * Upon successful completion, scif_listen() returns 0; otherwise in user mode - * -1 is returned and errno is set to indicate the error; in kernel mode the - * negative of one of the following errors is returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * EINVAL - the endpoint is not bound to a port - * EISCONN - The endpoint is already connected or listening - */ -int scif_listen(scif_epd_t epd, int backlog); - -/** - * scif_connect() - Initiate a connection on a port - * @epd: endpoint descriptor - * @dst: global id of port to which to connect - * - * The scif_connect() function requests the connection of endpoint epd to remote - * port dst. If the connection is successful, a peer endpoint, bound to dst, is - * created on node dst.node. On successful return, the connection is complete. - * - * If the endpoint epd has not already been bound to a port, scif_connect() - * will bind it to an unused local port. - * - * A connection is terminated when an endpoint of the connection is closed, - * either explicitly by scif_close(), or when a process that owns one of the - * endpoints of the connection is terminated. - * - * In user space, scif_connect() supports an asynchronous connection mode - * if the application has set the O_NONBLOCK flag on the endpoint via the - * fcntl() system call. Setting this flag will result in the calling process - * not to wait during scif_connect(). - * - * Return: - * Upon successful completion, scif_connect() returns the port ID to which the - * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is - * set to indicate the error; in kernel mode the negative of one of the - * following errors is returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNREFUSED - The destination was not listening for connections or refused - * the connection request - * EINVAL - dst.port is not a valid port ID - * EISCONN - The endpoint is already connected - * ENOMEM - No buffer space is available - * ENODEV - The destination node does not exist, or the node is lost or existed, - * but is not currently in the network since it may have crashed - * ENOSPC - No port number available for assignment - * EOPNOTSUPP - The endpoint is listening and cannot be connected - */ -int scif_connect(scif_epd_t epd, struct scif_port_id *dst); - -/** - * scif_accept() - Accept a connection on an endpoint - * @epd: endpoint descriptor - * @peer: global id of port to which connected - * @newepd: new connected endpoint descriptor - * @flags: flags - * - * The scif_accept() call extracts the first connection request from the queue - * of pending connections for the port on which epd is listening. scif_accept() - * creates a new endpoint, bound to the same port as epd, and allocates a new - * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new - * endpoint is connected to the endpoint through which the connection was - * requested. epd is unaffected by this call, and remains in the listening - * state. - * - * On successful return, peer holds the global port identifier (node id and - * local port number) of the port which requested the connection. - * - * A connection is terminated when an endpoint of the connection is closed, - * either explicitly by scif_close(), or when a process that owns one of the - * endpoints of the connection is terminated. - * - * The number of connections that can (subsequently) be accepted on epd is only - * limited by system resources (memory). - * - * The flags argument is formed by OR'ing together zero or more of the - * following values. - * SCIF_ACCEPT_SYNC - block until a connection request is presented. If - * SCIF_ACCEPT_SYNC is not in flags, and no pending - * connections are present on the queue, scif_accept() - * fails with an EAGAIN error - * - * In user mode, the select() and poll() functions can be used to determine - * when there is a connection request. In kernel mode, the scif_poll() - * function may be used for this purpose. A readable event will be delivered - * when a connection is requested. - * - * Return: - * Upon successful completion, scif_accept() returns 0; otherwise in user mode - * -1 is returned and errno is set to indicate the error; in kernel mode the - * negative of one of the following errors is returned. - * - * Errors: - * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be - * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete - * its connection request - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * EINTR - Interrupted function - * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is - * NULL, or newepd is NULL - * ENODEV - The requesting node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOMEM - Not enough space - * ENOENT - Secondary part of epd registration failed - */ -int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t - *newepd, int flags); - -/** - * scif_close() - Close an endpoint - * @epd: endpoint descriptor - * - * scif_close() closes an endpoint and performs necessary teardown of - * facilities associated with that endpoint. - * - * If epd is a listening endpoint then it will no longer accept connection - * requests on the port to which it is bound. Any pending connection requests - * are rejected. - * - * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs - * which are in-process through epd or its peer endpoint will complete before - * scif_close() returns. Registered windows of the local and peer endpoints are - * released as if scif_unregister() was called against each window. - * - * Closing a SCIF endpoint does not affect local registered memory mapped by - * a SCIF endpoint on a remote node. The local memory remains mapped by the peer - * SCIF endpoint explicitly removed by calling munmap(..) by the peer. - * - * If the peer endpoint's receive queue is not empty at the time that epd is - * closed, then the peer endpoint can be passed as the endpoint parameter to - * scif_recv() until the receive queue is empty. - * - * epd is freed and may no longer be accessed. - * - * Return: - * Upon successful completion, scif_close() returns 0; otherwise in user mode - * -1 is returned and errno is set to indicate the error; in kernel mode the - * negative of one of the following errors is returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - */ -int scif_close(scif_epd_t epd); - -/** - * scif_send() - Send a message - * @epd: endpoint descriptor - * @msg: message buffer address - * @len: message length - * @flags: blocking mode flags - * - * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data - * are copied from memory starting at address msg. On successful execution the - * return value of scif_send() is the number of bytes that were sent, and is - * zero if no bytes were sent because len was zero. scif_send() may be called - * only when the endpoint is in a connected state. - * - * If a scif_send() call is non-blocking, then it sends only those bytes which - * can be sent without waiting, up to a maximum of len bytes. - * - * If a scif_send() call is blocking, then it normally returns after sending - * all len bytes. If a blocking call is interrupted or the connection is - * reset, the call is considered successful if some bytes were sent or len is - * zero, otherwise the call is considered unsuccessful. - * - * In user mode, the select() and poll() functions can be used to determine - * when the send queue is not full. In kernel mode, the scif_poll() function - * may be used for this purpose. - * - * It is recommended that scif_send()/scif_recv() only be used for short - * control-type message communication between SCIF endpoints. The SCIF RMA - * APIs are expected to provide better performance for transfer sizes of - * 1024 bytes or longer for the current MIC hardware and software - * implementation. - * - * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK - * is passed as the flags argument. - * - * Return: - * Upon successful completion, scif_send() returns the number of bytes sent; - * otherwise in user mode -1 is returned and errno is set to indicate the - * error; in kernel mode the negative of one of the following errors is - * returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - flags is invalid, or len is negative - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOMEM - Not enough space - * ENOTCONN - The endpoint is not connected - */ -int scif_send(scif_epd_t epd, void *msg, int len, int flags); - -/** - * scif_recv() - Receive a message - * @epd: endpoint descriptor - * @msg: message buffer address - * @len: message buffer length - * @flags: blocking mode flags - * - * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of - * data are copied to memory starting at address msg. On successful execution - * the return value of scif_recv() is the number of bytes that were received, - * and is zero if no bytes were received because len was zero. scif_recv() may - * be called only when the endpoint is in a connected state. - * - * If a scif_recv() call is non-blocking, then it receives only those bytes - * which can be received without waiting, up to a maximum of len bytes. - * - * If a scif_recv() call is blocking, then it normally returns after receiving - * all len bytes. If the blocking call was interrupted due to a disconnection, - * subsequent calls to scif_recv() will copy all bytes received upto the point - * of disconnection. - * - * In user mode, the select() and poll() functions can be used to determine - * when data is available to be received. In kernel mode, the scif_poll() - * function may be used for this purpose. - * - * It is recommended that scif_send()/scif_recv() only be used for short - * control-type message communication between SCIF endpoints. The SCIF RMA - * APIs are expected to provide better performance for transfer sizes of - * 1024 bytes or longer for the current MIC hardware and software - * implementation. - * - * scif_recv() will block until the entire message is received if - * SCIF_RECV_BLOCK is passed as the flags argument. - * - * Return: - * Upon successful completion, scif_recv() returns the number of bytes - * received; otherwise in user mode -1 is returned and errno is set to - * indicate the error; in kernel mode the negative of one of the following - * errors is returned. - * - * Errors: - * EAGAIN - The destination node is returning from a low power state - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - flags is invalid, or len is negative - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOMEM - Not enough space - * ENOTCONN - The endpoint is not connected - */ -int scif_recv(scif_epd_t epd, void *msg, int len, int flags); - -/** - * scif_register() - Mark a memory region for remote access. - * @epd: endpoint descriptor - * @addr: starting virtual address - * @len: length of range - * @offset: offset of window - * @prot_flags: read/write protection flags - * @map_flags: mapping flags - * - * The scif_register() function opens a window, a range of whole pages of the - * registered address space of the endpoint epd, starting at offset po and - * continuing for len bytes. The value of po, further described below, is a - * function of the parameters offset and len, and the value of map_flags. Each - * page of the window represents the physical memory page which backs the - * corresponding page of the range of virtual address pages starting at addr - * and continuing for len bytes. addr and len are constrained to be multiples - * of the page size. A successful scif_register() call returns po. - * - * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset - * exactly, and offset is constrained to be a multiple of the page size. The - * mapping established by scif_register() will not replace any existing - * registration; an error is returned if any page within the range [offset, - * offset + len - 1] intersects an existing window. - * - * When SCIF_MAP_FIXED is not set, the implementation uses offset in an - * implementation-defined manner to arrive at po. The po value so chosen will - * be an area of the registered address space that the implementation deems - * suitable for a mapping of len bytes. An offset value of 0 is interpreted as - * granting the implementation complete freedom in selecting po, subject to - * constraints described below. A non-zero value of offset is taken to be a - * suggestion of an offset near which the mapping should be placed. When the - * implementation selects a value for po, it does not replace any extant - * window. In all cases, po will be a multiple of the page size. - * - * The physical pages which are so represented by a window are available for - * access in calls to mmap(), scif_readfrom(), scif_writeto(), - * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the - * physical pages represented by the window will not be reused by the memory - * subsystem for any other purpose. Note that the same physical page may be - * represented by multiple windows. - * - * Subsequent operations which change the memory pages to which virtual - * addresses are mapped (such as mmap(), munmap()) have no effect on - * existing window. - * - * If the process will fork(), it is recommended that the registered - * virtual address range be marked with MADV_DONTFORK. Doing so will prevent - * problems due to copy-on-write semantics. - * - * The prot_flags argument is formed by OR'ing together one or more of the - * following values. - * SCIF_PROT_READ - allow read operations from the window - * SCIF_PROT_WRITE - allow write operations to the window - * - * Return: - * Upon successful completion, scif_register() returns the offset at which the - * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that - * is (off_t *)-1) is returned and errno is set to indicate the error; in - * kernel mode the negative of one of the following errors is returned. - * - * Errors: - * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range - * [offset, offset + len -1] are already registered - * EAGAIN - The mapping could not be performed due to lack of resources - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is - * set in flags, and offset is not a multiple of the page size, or addr is not a - * multiple of the page size, or len is not a multiple of the page size, or is - * 0, or offset is negative - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOMEM - Not enough space - * ENOTCONN -The endpoint is not connected - */ -off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, - int prot_flags, int map_flags); - -/** - * scif_unregister() - Mark a memory region for remote access. - * @epd: endpoint descriptor - * @offset: start of range to unregister - * @len: length of range to unregister - * - * The scif_unregister() function closes those previously registered windows - * which are entirely within the range [offset, offset + len - 1]. It is an - * error to specify a range which intersects only a subrange of a window. - * - * On a successful return, pages within the window may no longer be specified - * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(), - * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, - * however, continues to exist until all previous references against it are - * removed. A window is referenced if there is a mapping to it created by - * mmap(), or if scif_get_pages() was called against the window - * (and the pages have not been returned via scif_put_pages()). A window is - * also referenced while an RMA, in which some range of the window is a source - * or destination, is in progress. Finally a window is referenced while some - * offset in that window was specified to scif_fence_signal(), and the RMAs - * marked by that call to scif_fence_signal() have not completed. While a - * window is in this state, its registered address space pages are not - * available for use in a new registered window. - * - * When all such references to the window have been removed, its references to - * all the physical pages which it represents are removed. Similarly, the - * registered address space pages of the window become available for - * registration in a new window. - * - * Return: - * Upon successful completion, scif_unregister() returns 0; otherwise in user - * mode -1 is returned and errno is set to indicate the error; in kernel mode - * the negative of one of the following errors is returned. In the event of an - * error, no windows are unregistered. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a - * window, or offset is negative - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the - * registered address space of epd - */ -int scif_unregister(scif_epd_t epd, off_t offset, size_t len); - -/** - * scif_readfrom() - Copy from a remote address space - * @epd: endpoint descriptor - * @loffset: offset in local registered address space to - * which to copy - * @len: length of range to copy - * @roffset: offset in remote registered address space - * from which to copy - * @rma_flags: transfer mode flags - * - * scif_readfrom() copies len bytes from the remote registered address space of - * the peer of endpoint epd, starting at the offset roffset to the local - * registered address space of epd, starting at the offset loffset. - * - * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, - * roffset + len - 1] must be within some registered window or windows of the - * local and remote nodes. A range may intersect multiple registered windows, - * but only if those windows are contiguous in the registered address space. - * - * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using - * programmed read/writes. Otherwise the data is copied using DMA. If rma_- - * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the - * transfer is complete. Otherwise, the transfer may be performed asynchron- - * ously. The order in which any two asynchronous RMA operations complete - * is non-deterministic. The synchronization functions, scif_fence_mark()/ - * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to - * the completion of asynchronous RMA operations on the same endpoint. - * - * The DMA transfer of individual bytes is not guaranteed to complete in - * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last - * cacheline or partial cacheline of the source range will become visible on - * the destination node after all other transferred data in the source - * range has become visible on the destination node. - * - * The optimal DMA performance will likely be realized if both - * loffset and roffset are cacheline aligned (are a multiple of 64). Lower - * performance will likely be realized if loffset and roffset are not - * cacheline aligned but are separated by some multiple of 64. The lowest level - * of performance is likely if loffset and roffset are not separated by a - * multiple of 64. - * - * The rma_flags argument is formed by ORing together zero or more of the - * following values. - * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA - * engine. - * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the - * transfer has completed. Passing this flag results in the - * current implementation busy waiting and consuming CPU cycles - * while the DMA transfer is in progress for best performance by - * avoiding the interrupt latency. - * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of - * the source range becomes visible on the destination node - * after all other transferred data in the source range has - * become visible on the destination - * - * Return: - * Upon successful completion, scif_readfrom() returns 0; otherwise in user - * mode -1 is returned and errno is set to indicate the error; in kernel mode - * the negative of one of the following errors is returned. - * - * Errors: - * EACCES - Attempt to write to a read-only range - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - rma_flags is invalid - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered - * address space of epd, or, The range [roffset, roffset + len - 1] is invalid - * for the registered address space of the peer of epd, or loffset or roffset - * is negative - */ -int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t - roffset, int rma_flags); - -/** - * scif_writeto() - Copy to a remote address space - * @epd: endpoint descriptor - * @loffset: offset in local registered address space - * from which to copy - * @len: length of range to copy - * @roffset: offset in remote registered address space to - * which to copy - * @rma_flags: transfer mode flags - * - * scif_writeto() copies len bytes from the local registered address space of - * epd, starting at the offset loffset to the remote registered address space - * of the peer of endpoint epd, starting at the offset roffset. - * - * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, - * roffset + len - 1] must be within some registered window or windows of the - * local and remote nodes. A range may intersect multiple registered windows, - * but only if those windows are contiguous in the registered address space. - * - * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using - * programmed read/writes. Otherwise the data is copied using DMA. If rma_- - * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the - * transfer is complete. Otherwise, the transfer may be performed asynchron- - * ously. The order in which any two asynchronous RMA operations complete - * is non-deterministic. The synchronization functions, scif_fence_mark()/ - * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to - * the completion of asynchronous RMA operations on the same endpoint. - * - * The DMA transfer of individual bytes is not guaranteed to complete in - * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last - * cacheline or partial cacheline of the source range will become visible on - * the destination node after all other transferred data in the source - * range has become visible on the destination node. - * - * The optimal DMA performance will likely be realized if both - * loffset and roffset are cacheline aligned (are a multiple of 64). Lower - * performance will likely be realized if loffset and roffset are not cacheline - * aligned but are separated by some multiple of 64. The lowest level of - * performance is likely if loffset and roffset are not separated by a multiple - * of 64. - * - * The rma_flags argument is formed by ORing together zero or more of the - * following values. - * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA - * engine. - * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the - * transfer has completed. Passing this flag results in the - * current implementation busy waiting and consuming CPU cycles - * while the DMA transfer is in progress for best performance by - * avoiding the interrupt latency. - * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of - * the source range becomes visible on the destination node - * after all other transferred data in the source range has - * become visible on the destination - * - * Return: - * Upon successful completion, scif_readfrom() returns 0; otherwise in user - * mode -1 is returned and errno is set to indicate the error; in kernel mode - * the negative of one of the following errors is returned. - * - * Errors: - * EACCES - Attempt to write to a read-only range - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - rma_flags is invalid - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered - * address space of epd, or, The range [roffset , roffset + len -1] is invalid - * for the registered address space of the peer of epd, or loffset or roffset - * is negative - */ -int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t - roffset, int rma_flags); - -/** - * scif_vreadfrom() - Copy from a remote address space - * @epd: endpoint descriptor - * @addr: address to which to copy - * @len: length of range to copy - * @roffset: offset in remote registered address space - * from which to copy - * @rma_flags: transfer mode flags - * - * scif_vreadfrom() copies len bytes from the remote registered address - * space of the peer of endpoint epd, starting at the offset roffset, to local - * memory, starting at addr. - * - * The specified range [roffset, roffset + len - 1] must be within some - * registered window or windows of the remote nodes. The range may - * intersect multiple registered windows, but only if those windows are - * contiguous in the registered address space. - * - * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using - * programmed read/writes. Otherwise the data is copied using DMA. If rma_- - * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the - * transfer is complete. Otherwise, the transfer may be performed asynchron- - * ously. The order in which any two asynchronous RMA operations complete - * is non-deterministic. The synchronization functions, scif_fence_mark()/ - * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to - * the completion of asynchronous RMA operations on the same endpoint. - * - * The DMA transfer of individual bytes is not guaranteed to complete in - * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last - * cacheline or partial cacheline of the source range will become visible on - * the destination node after all other transferred data in the source - * range has become visible on the destination node. - * - * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back - * the specified local memory range may be remain in a pinned state even after - * the specified transfer completes. This may reduce overhead if some or all of - * the same virtual address range is referenced in a subsequent call of - * scif_vreadfrom() or scif_vwriteto(). - * - * The optimal DMA performance will likely be realized if both - * addr and roffset are cacheline aligned (are a multiple of 64). Lower - * performance will likely be realized if addr and roffset are not - * cacheline aligned but are separated by some multiple of 64. The lowest level - * of performance is likely if addr and roffset are not separated by a - * multiple of 64. - * - * The rma_flags argument is formed by ORing together zero or more of the - * following values. - * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA - * engine. - * SCIF_RMA_USECACHE - enable registration caching - * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the - * transfer has completed. Passing this flag results in the - * current implementation busy waiting and consuming CPU cycles - * while the DMA transfer is in progress for best performance by - * avoiding the interrupt latency. - * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of - * the source range becomes visible on the destination node - * after all other transferred data in the source range has - * become visible on the destination - * - * Return: - * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user - * mode -1 is returned and errno is set to indicate the error; in kernel mode - * the negative of one of the following errors is returned. - * - * Errors: - * EACCES - Attempt to write to a read-only range - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - rma_flags is invalid - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the - * registered address space of epd - */ -int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset, - int rma_flags); - -/** - * scif_vwriteto() - Copy to a remote address space - * @epd: endpoint descriptor - * @addr: address from which to copy - * @len: length of range to copy - * @roffset: offset in remote registered address space to - * which to copy - * @rma_flags: transfer mode flags - * - * scif_vwriteto() copies len bytes from the local memory, starting at addr, to - * the remote registered address space of the peer of endpoint epd, starting at - * the offset roffset. - * - * The specified range [roffset, roffset + len - 1] must be within some - * registered window or windows of the remote nodes. The range may intersect - * multiple registered windows, but only if those windows are contiguous in the - * registered address space. - * - * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using - * programmed read/writes. Otherwise the data is copied using DMA. If rma_- - * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the - * transfer is complete. Otherwise, the transfer may be performed asynchron- - * ously. The order in which any two asynchronous RMA operations complete - * is non-deterministic. The synchronization functions, scif_fence_mark()/ - * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to - * the completion of asynchronous RMA operations on the same endpoint. - * - * The DMA transfer of individual bytes is not guaranteed to complete in - * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last - * cacheline or partial cacheline of the source range will become visible on - * the destination node after all other transferred data in the source - * range has become visible on the destination node. - * - * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back - * the specified local memory range may be remain in a pinned state even after - * the specified transfer completes. This may reduce overhead if some or all of - * the same virtual address range is referenced in a subsequent call of - * scif_vreadfrom() or scif_vwriteto(). - * - * The optimal DMA performance will likely be realized if both - * addr and offset are cacheline aligned (are a multiple of 64). Lower - * performance will likely be realized if addr and offset are not cacheline - * aligned but are separated by some multiple of 64. The lowest level of - * performance is likely if addr and offset are not separated by a multiple of - * 64. - * - * The rma_flags argument is formed by ORing together zero or more of the - * following values. - * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA - * engine. - * SCIF_RMA_USECACHE - allow registration caching - * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the - * transfer has completed. Passing this flag results in the - * current implementation busy waiting and consuming CPU cycles - * while the DMA transfer is in progress for best performance by - * avoiding the interrupt latency. - * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of - * the source range becomes visible on the destination node - * after all other transferred data in the source range has - * become visible on the destination - * - * Return: - * Upon successful completion, scif_vwriteto() returns 0; otherwise in user - * mode -1 is returned and errno is set to indicate the error; in kernel mode - * the negative of one of the following errors is returned. - * - * Errors: - * EACCES - Attempt to write to a read-only range - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - rma_flags is invalid - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the - * registered address space of epd - */ -int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset, - int rma_flags); - -/** - * scif_fence_mark() - Mark previously issued RMAs - * @epd: endpoint descriptor - * @flags: control flags - * @mark: marked value returned as output. - * - * scif_fence_mark() returns after marking the current set of all uncompleted - * RMAs initiated through the endpoint epd or the current set of all - * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are - * marked with a value returned at mark. The application may subsequently call - * scif_fence_wait(), passing the value returned at mark, to await completion - * of all RMAs so marked. - * - * The flags argument has exactly one of the following values. - * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint - * epd are marked - * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer - * of endpoint epd are marked - * - * Return: - * Upon successful completion, scif_fence_mark() returns 0; otherwise in user - * mode -1 is returned and errno is set to indicate the error; in kernel mode - * the negative of one of the following errors is returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - flags is invalid - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENOMEM - Insufficient kernel memory was available - */ -int scif_fence_mark(scif_epd_t epd, int flags, int *mark); - -/** - * scif_fence_wait() - Wait for completion of marked RMAs - * @epd: endpoint descriptor - * @mark: mark request - * - * scif_fence_wait() returns after all RMAs marked with mark have completed. - * The value passed in mark must have been obtained in a previous call to - * scif_fence_mark(). - * - * Return: - * Upon successful completion, scif_fence_wait() returns 0; otherwise in user - * mode -1 is returned and errno is set to indicate the error; in kernel mode - * the negative of one of the following errors is returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENOMEM - Insufficient kernel memory was available - */ -int scif_fence_wait(scif_epd_t epd, int mark); - -/** - * scif_fence_signal() - Request a memory update on completion of RMAs - * @epd: endpoint descriptor - * @loff: local offset - * @lval: local value to write to loffset - * @roff: remote offset - * @rval: remote value to write to roffset - * @flags: flags - * - * scif_fence_signal() returns after marking the current set of all uncompleted - * RMAs initiated through the endpoint epd or marking the current set of all - * uncompleted RMAs initiated through the peer of endpoint epd. - * - * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the - * marked set, lval is written to memory at the address corresponding to offset - * loff in the local registered address space of epd. loff must be within a - * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion - * of the RMAs in the marked set, rval is written to memory at the address - * corresponding to offset roff in the remote registered address space of epd. - * roff must be within a remote registered window of the peer of epd. Note - * that any specified offset must be DWORD (4 byte / 32 bit) aligned. - * - * The flags argument is formed by OR'ing together the following. - * Exactly one of the following values. - * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint - * epd are marked - * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer - * of endpoint epd are marked - * One or more of the following values. - * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to - * memory at the address corresponding to offset loff in the local - * registered address space of epd. - * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to - * memory at the address corresponding to offset roff in the remote - * registered address space of epd. - * - * Return: - * Upon successful completion, scif_fence_signal() returns 0; otherwise in - * user mode -1 is returned and errno is set to indicate the error; in kernel - * mode the negative of one of the following errors is returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - flags is invalid, or loff or roff are not DWORD aligned - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENXIO - loff is invalid for the registered address of epd, or roff is invalid - * for the registered address space, of the peer of epd - */ -int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff, - u64 rval, int flags); - -/** - * scif_get_node_ids() - Return information about online nodes - * @nodes: array in which to return online node IDs - * @len: number of entries in the nodes array - * @self: address to place the node ID of the local node - * - * scif_get_node_ids() fills in the nodes array with up to len node IDs of the - * nodes in the SCIF network. If there is not enough space in nodes, as - * indicated by the len parameter, only len node IDs are returned in nodes. The - * return value of scif_get_node_ids() is the total number of nodes currently in - * the SCIF network. By checking the return value against the len parameter, - * the user may determine if enough space for nodes was allocated. - * - * The node ID of the local node is returned at self. - * - * Return: - * Upon successful completion, scif_get_node_ids() returns the actual number of - * online nodes in the SCIF network including 'self'; otherwise in user mode - * -1 is returned and errno is set to indicate the error; in kernel mode no - * errors are returned. - */ -int scif_get_node_ids(u16 *nodes, int len, u16 *self); - -/** - * scif_pin_pages() - Pin a set of pages - * @addr: Virtual address of range to pin - * @len: Length of range to pin - * @prot_flags: Page protection flags - * @map_flags: Page classification flags - * @pinned_pages: Handle to pinned pages - * - * scif_pin_pages() pins (locks in physical memory) the physical pages which - * back the range of virtual address pages starting at addr and continuing for - * len bytes. addr and len are constrained to be multiples of the page size. A - * successful scif_pin_pages() call returns a handle to pinned_pages which may - * be used in subsequent calls to scif_register_pinned_pages(). - * - * The pages will remain pinned as long as there is a reference against the - * scif_pinned_pages_t value returned by scif_pin_pages() and until - * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A - * reference is added to a scif_pinned_pages_t value each time a window is - * created by calling scif_register_pinned_pages() and passing the - * scif_pinned_pages_t value. A reference is removed from a - * scif_pinned_pages_t value each time such a window is deleted. - * - * Subsequent operations which change the memory pages to which virtual - * addresses are mapped (such as mmap(), munmap()) have no effect on the - * scif_pinned_pages_t value or windows created against it. - * - * If the process will fork(), it is recommended that the registered - * virtual address range be marked with MADV_DONTFORK. Doing so will prevent - * problems due to copy-on-write semantics. - * - * The prot_flags argument is formed by OR'ing together one or more of the - * following values. - * SCIF_PROT_READ - allow read operations against the pages - * SCIF_PROT_WRITE - allow write operations against the pages - * The map_flags argument can be set as SCIF_MAP_KERNEL to interpret addr as a - * kernel space address. By default, addr is interpreted as a user space - * address. - * - * Return: - * Upon successful completion, scif_pin_pages() returns 0; otherwise the - * negative of one of the following errors is returned. - * - * Errors: - * EINVAL - prot_flags is invalid, map_flags is invalid, or offset is negative - * ENOMEM - Not enough space - */ -int scif_pin_pages(void *addr, size_t len, int prot_flags, int map_flags, - scif_pinned_pages_t *pinned_pages); - -/** - * scif_unpin_pages() - Unpin a set of pages - * @pinned_pages: Handle to pinned pages to be unpinned - * - * scif_unpin_pages() prevents scif_register_pinned_pages() from registering new - * windows against pinned_pages. The physical pages represented by pinned_pages - * will remain pinned until all windows previously registered against - * pinned_pages are deleted (the window is scif_unregister()'d and all - * references to the window are removed (see scif_unregister()). - * - * pinned_pages must have been obtain from a previous call to scif_pin_pages(). - * After calling scif_unpin_pages(), it is an error to pass pinned_pages to - * scif_register_pinned_pages(). - * - * Return: - * Upon successful completion, scif_unpin_pages() returns 0; otherwise the - * negative of one of the following errors is returned. - * - * Errors: - * EINVAL - pinned_pages is not valid - */ -int scif_unpin_pages(scif_pinned_pages_t pinned_pages); - -/** - * scif_register_pinned_pages() - Mark a memory region for remote access. - * @epd: endpoint descriptor - * @pinned_pages: Handle to pinned pages - * @offset: Registered address space offset - * @map_flags: Flags which control where pages are mapped - * - * The scif_register_pinned_pages() function opens a window, a range of whole - * pages of the registered address space of the endpoint epd, starting at - * offset po. The value of po, further described below, is a function of the - * parameters offset and pinned_pages, and the value of map_flags. Each page of - * the window represents a corresponding physical memory page of the range - * represented by pinned_pages; the length of the window is the same as the - * length of range represented by pinned_pages. A successful - * scif_register_pinned_pages() call returns po as the return value. - * - * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset - * exactly, and offset is constrained to be a multiple of the page size. The - * mapping established by scif_register_pinned_pages() will not replace any - * existing registration; an error is returned if any page of the new window - * would intersect an existing window. - * - * When SCIF_MAP_FIXED is not set, the implementation uses offset in an - * implementation-defined manner to arrive at po. The po so chosen will be an - * area of the registered address space that the implementation deems suitable - * for a mapping of the required size. An offset value of 0 is interpreted as - * granting the implementation complete freedom in selecting po, subject to - * constraints described below. A non-zero value of offset is taken to be a - * suggestion of an offset near which the mapping should be placed. When the - * implementation selects a value for po, it does not replace any extant - * window. In all cases, po will be a multiple of the page size. - * - * The physical pages which are so represented by a window are available for - * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(), - * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the - * physical pages represented by the window will not be reused by the memory - * subsystem for any other purpose. Note that the same physical page may be - * represented by multiple windows. - * - * Windows created by scif_register_pinned_pages() are unregistered by - * scif_unregister(). - * - * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a - * fixed offset. - * - * Return: - * Upon successful completion, scif_register_pinned_pages() returns the offset - * at which the mapping was placed (po); otherwise the negative of one of the - * following errors is returned. - * - * Errors: - * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags and pages in the new window - * would intersect an existing window - * EAGAIN - The mapping could not be performed due to lack of resources - * ECONNRESET - Connection reset by peer - * EINVAL - map_flags is invalid, or SCIF_MAP_FIXED is set in map_flags, and - * offset is not a multiple of the page size, or offset is negative - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOMEM - Not enough space - * ENOTCONN - The endpoint is not connected - */ -off_t scif_register_pinned_pages(scif_epd_t epd, - scif_pinned_pages_t pinned_pages, - off_t offset, int map_flags); - -/** - * scif_get_pages() - Add references to remote registered pages - * @epd: endpoint descriptor - * @offset: remote registered offset - * @len: length of range of pages - * @pages: returned scif_range structure - * - * scif_get_pages() returns the addresses of the physical pages represented by - * those pages of the registered address space of the peer of epd, starting at - * offset and continuing for len bytes. offset and len are constrained to be - * multiples of the page size. - * - * All of the pages in the specified range [offset, offset + len - 1] must be - * within a single window of the registered address space of the peer of epd. - * - * The addresses are returned as a virtually contiguous array pointed to by the - * phys_addr component of the scif_range structure whose address is returned in - * pages. The nr_pages component of scif_range is the length of the array. The - * prot_flags component of scif_range holds the protection flag value passed - * when the pages were registered. - * - * Each physical page whose address is returned by scif_get_pages() remains - * available and will not be released for reuse until the scif_range structure - * is returned in a call to scif_put_pages(). The scif_range structure returned - * by scif_get_pages() must be unmodified. - * - * It is an error to call scif_close() on an endpoint on which a scif_range - * structure of that endpoint has not been returned to scif_put_pages(). - * - * Return: - * Upon successful completion, scif_get_pages() returns 0; otherwise the - * negative of one of the following errors is returned. - * Errors: - * ECONNRESET - Connection reset by peer. - * EINVAL - offset is not a multiple of the page size, or offset is negative, or - * len is not a multiple of the page size - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid - * for the registered address space of the peer epd - */ -int scif_get_pages(scif_epd_t epd, off_t offset, size_t len, - struct scif_range **pages); - -/** - * scif_put_pages() - Remove references from remote registered pages - * @pages: pages to be returned - * - * scif_put_pages() releases a scif_range structure previously obtained by - * calling scif_get_pages(). The physical pages represented by pages may - * be reused when the window which represented those pages is unregistered. - * Therefore, those pages must not be accessed after calling scif_put_pages(). - * - * Return: - * Upon successful completion, scif_put_pages() returns 0; otherwise the - * negative of one of the following errors is returned. - * Errors: - * EINVAL - pages does not point to a valid scif_range structure, or - * the scif_range structure pointed to by pages was already returned - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - */ -int scif_put_pages(struct scif_range *pages); - -/** - * scif_poll() - Wait for some event on an endpoint - * @epds: Array of endpoint descriptors - * @nepds: Length of epds - * @timeout: Upper limit on time for which scif_poll() will block - * - * scif_poll() waits for one of a set of endpoints to become ready to perform - * an I/O operation. - * - * The epds argument specifies the endpoint descriptors to be examined and the - * events of interest for each endpoint descriptor. epds is a pointer to an - * array with one member for each open endpoint descriptor of interest. - * - * The number of items in the epds array is specified in nepds. The epd field - * of scif_pollepd is an endpoint descriptor of an open endpoint. The field - * events is a bitmask specifying the events which the application is - * interested in. The field revents is an output parameter, filled by the - * kernel with the events that actually occurred. The bits returned in revents - * can include any of those specified in events, or one of the values EPOLLERR, - * EPOLLHUP, or EPOLLNVAL. (These three bits are meaningless in the events - * field, and will be set in the revents field whenever the corresponding - * condition is true.) - * - * If none of the events requested (and no error) has occurred for any of the - * endpoint descriptors, then scif_poll() blocks until one of the events occurs. - * - * The timeout argument specifies an upper limit on the time for which - * scif_poll() will block, in milliseconds. Specifying a negative value in - * timeout means an infinite timeout. - * - * The following bits may be set in events and returned in revents. - * EPOLLIN - Data may be received without blocking. For a connected - * endpoint, this means that scif_recv() may be called without blocking. For a - * listening endpoint, this means that scif_accept() may be called without - * blocking. - * EPOLLOUT - Data may be sent without blocking. For a connected endpoint, this - * means that scif_send() may be called without blocking. EPOLLOUT may also be - * used to block waiting for a non-blocking connect to complete. This bit value - * has no meaning for a listening endpoint and is ignored if specified. - * - * The following bits are only returned in revents, and are ignored if set in - * events. - * EPOLLERR - An error occurred on the endpoint - * EPOLLHUP - The connection to the peer endpoint was disconnected - * EPOLLNVAL - The specified endpoint descriptor is invalid. - * - * Return: - * Upon successful completion, scif_poll() returns a non-negative value. A - * positive value indicates the total number of endpoint descriptors that have - * been selected (that is, endpoint descriptors for which the revents member is - * non-zero). A value of 0 indicates that the call timed out and no endpoint - * descriptors have been selected. Otherwise in user mode -1 is returned and - * errno is set to indicate the error; in kernel mode the negative of one of - * the following errors is returned. - * - * Errors: - * EINTR - A signal occurred before any requested event - * EINVAL - The nepds argument is greater than {OPEN_MAX} - * ENOMEM - There was no space to allocate file descriptor tables - */ -int scif_poll(struct scif_pollepd *epds, unsigned int nepds, long timeout); - -/** - * scif_client_register() - Register a SCIF client - * @client: client to be registered - * - * scif_client_register() registers a SCIF client. The probe() method - * of the client is called when SCIF peer devices come online and the - * remove() method is called when the peer devices disappear. - * - * Return: - * Upon successful completion, scif_client_register() returns a non-negative - * value. Otherwise the return value is the same as subsys_interface_register() - * in the kernel. - */ -int scif_client_register(struct scif_client *client); - -/** - * scif_client_unregister() - Unregister a SCIF client - * @client: client to be unregistered - * - * scif_client_unregister() unregisters a SCIF client. - * - * Return: - * None - */ -void scif_client_unregister(struct scif_client *client); - -#endif /* __SCIF_H__ */ -- cgit v1.2.3 From 7cb415003468d41aecd6877ae088c38f6c0fc174 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Wed, 28 Oct 2020 06:56:47 -0400 Subject: Fonts: Make font size unsigned in font_desc `width` and `height` are defined as unsigned in our UAPI font descriptor `struct console_font`. Make them unsigned in our kernel font descriptor `struct font_desc`, too. Also, change the corresponding printk() format identifiers from `%d` to `%u`, in sti_select_fbfont(). Signed-off-by: Peilin Ye Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20201028105647.1210161-1-yepeilin.cs@gmail.com --- include/linux/font.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/font.h b/include/linux/font.h index 4a3f8741bb7e..b7214d7881f0 100644 --- a/include/linux/font.h +++ b/include/linux/font.h @@ -16,7 +16,7 @@ struct font_desc { int idx; const char *name; - int width, height; + unsigned int width, height; const void *data; int pref; }; -- cgit v1.2.3 From 8073c1ac82c12aaf1b475a3ce5328d43b3eaa4ae Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 24 Oct 2020 22:35:11 +0100 Subject: genirq/msi: Allow shadow declarations of msi_msg:: $member Architectures like x86 have their MSI messages in various bits of the data, address_lo and address_hi field. Composing or decomposing these messages with bitmasks and shifts is possible, but unreadable gunk. Allow architectures to provide an architecture specific representation for each member of msi_msg. Provide empty defaults for each and stick them into an union. Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201024213535.443185-12-dwmw2@infradead.org --- include/linux/msi.h | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index 6b584cc4757c..360a0a7e7341 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -4,11 +4,50 @@ #include #include +#include + +/* Dummy shadow structures if an architecture does not define them */ +#ifndef arch_msi_msg_addr_lo +typedef struct arch_msi_msg_addr_lo { + u32 address_lo; +} __attribute__ ((packed)) arch_msi_msg_addr_lo_t; +#endif + +#ifndef arch_msi_msg_addr_hi +typedef struct arch_msi_msg_addr_hi { + u32 address_hi; +} __attribute__ ((packed)) arch_msi_msg_addr_hi_t; +#endif + +#ifndef arch_msi_msg_data +typedef struct arch_msi_msg_data { + u32 data; +} __attribute__ ((packed)) arch_msi_msg_data_t; +#endif +/** + * msi_msg - Representation of a MSI message + * @address_lo: Low 32 bits of msi message address + * @arch_addrlo: Architecture specific shadow of @address_lo + * @address_hi: High 32 bits of msi message address + * (only used when device supports it) + * @arch_addrhi: Architecture specific shadow of @address_hi + * @data: MSI message data (usually 16 bits) + * @arch_data: Architecture specific shadow of @data + */ struct msi_msg { - u32 address_lo; /* low 32 bits of msi message address */ - u32 address_hi; /* high 32 bits of msi message address */ - u32 data; /* 16 bits of msi message data */ + union { + u32 address_lo; + arch_msi_msg_addr_lo_t arch_addr_lo; + }; + union { + u32 address_hi; + arch_msi_msg_addr_hi_t arch_addr_hi; + }; + union { + u32 data; + arch_msi_msg_data_t arch_data; + }; }; extern int pci_msi_ignore_mask; @@ -243,7 +282,6 @@ struct msi_controller { #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN #include -#include struct irq_domain; struct irq_domain_ops; -- cgit v1.2.3 From cf83b2d2e2b64920bd6999b199dfa271d7e94cf8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 27 Oct 2020 23:10:54 -0700 Subject: bpf: Permit cond_resched for some iterators Commit e679654a704e ("bpf: Fix a rcu_sched stall issue with bpf task/task_file iterator") tries to fix rcu stalls warning which is caused by bpf task_file iterator when running "bpftool prog". rcu: INFO: rcu_sched self-detected stall on CPU rcu: \x097-....: (20999 ticks this GP) idle=302/1/0x4000000000000000 softirq=1508852/1508852 fqs=4913 \x09(t=21031 jiffies g=2534773 q=179750) NMI backtrace for cpu 7 CPU: 7 PID: 184195 Comm: bpftool Kdump: loaded Tainted: G W 5.8.0-00004-g68bfc7f8c1b4 #6 Hardware name: Quanta Twin Lakes MP/Twin Lakes Passive MP, BIOS F09_3A17 05/03/2019 Call Trace: dump_stack+0x57/0x70 nmi_cpu_backtrace.cold+0x14/0x53 ? lapic_can_unplug_cpu.cold+0x39/0x39 nmi_trigger_cpumask_backtrace+0xb7/0xc7 rcu_dump_cpu_stacks+0xa2/0xd0 rcu_sched_clock_irq.cold+0x1ff/0x3d9 ? tick_nohz_handler+0x100/0x100 update_process_times+0x5b/0x90 tick_sched_timer+0x5e/0xf0 __hrtimer_run_queues+0x12a/0x2a0 hrtimer_interrupt+0x10e/0x280 __sysvec_apic_timer_interrupt+0x51/0xe0 asm_call_on_stack+0xf/0x20 sysvec_apic_timer_interrupt+0x6f/0x80 ... task_file_seq_next+0x52/0xa0 bpf_seq_read+0xb9/0x320 vfs_read+0x9d/0x180 ksys_read+0x5f/0xe0 do_syscall_64+0x38/0x60 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The fix is to limit the number of bpf program runs to be one million. This fixed the program in most cases. But we also found under heavy load, which can increase the wallclock time for bpf_seq_read(), the warning may still be possible. For example, calling bpf_delay() in the "while" loop of bpf_seq_read(), which will introduce artificial delay, the warning will show up in my qemu run. static unsigned q; volatile unsigned *p = &q; volatile unsigned long long ll; static void bpf_delay(void) { int i, j; for (i = 0; i < 10000; i++) for (j = 0; j < 10000; j++) ll += *p; } There are two ways to fix this issue. One is to reduce the above one million threshold to say 100,000 and hopefully rcu warning will not show up any more. Another is to introduce a target feature which enables bpf_seq_read() calling cond_resched(). This patch took second approach as the first approach may cause more -EAGAIN failures for read() syscalls. Note that not all bpf_iter targets can permit cond_resched() in bpf_seq_read() as some, e.g., netlink seq iterator, rcu read lock critical section spans through seq_ops->next() -> seq_ops->show() -> seq_ops->next(). For the kernel code with the above hack, "bpftool p" roughly takes 38 seconds to finish on my VM with 184 bpf program runs. Using the following command, I am able to collect the number of context switches: perf stat -e context-switches -- ./bpftool p >& log Without this patch, 69 context-switches With this patch, 75 context-switches This patch added additional 6 context switches, roughly every 6 seconds to reschedule, to avoid lengthy no-rescheduling which may cause the above RCU warnings. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201028061054.1411116-1-yhs@fb.com --- include/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2b16bf48aab6..2fffd30e13ac 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1294,6 +1294,10 @@ typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux, typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info); +enum bpf_iter_feature { + BPF_ITER_RESCHED = BIT(0), +}; + #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; @@ -1302,6 +1306,7 @@ struct bpf_iter_reg { bpf_iter_show_fdinfo_t show_fdinfo; bpf_iter_fill_link_info_t fill_link_info; u32 ctx_arg_info_size; + u32 feature; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; const struct bpf_iter_seq_info *seq_info; }; -- cgit v1.2.3 From 5c251e9dc0e127bac6fc5b8e6696363d2e35f515 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Oct 2020 14:32:27 -0600 Subject: signal: Add task_sigpending() helper This is in preparation for maintaining signal_pending() as the decider of whether or not a schedule() loop should be broken, or continue sleeping. This is different than the core signal use cases, which really need to know whether an actual signal is pending or not. task_sigpending() returns non-zero if TIF_SIGPENDING is set. Only core kernel use cases should care about the distinction between the two, make sure those use the task_sigpending() helper. Signed-off-by: Jens Axboe Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Oleg Nesterov Link: https://lore.kernel.org/r/20201026203230.386348-2-axboe@kernel.dk --- include/linux/sched/signal.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 1bad18a1d8ba..404145dc536e 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -353,11 +353,16 @@ static inline int restart_syscall(void) return -ERESTARTNOINTR; } -static inline int signal_pending(struct task_struct *p) +static inline int task_sigpending(struct task_struct *p) { return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); } +static inline int signal_pending(struct task_struct *p) +{ + return task_sigpending(p); +} + static inline int __fatal_signal_pending(struct task_struct *p) { return unlikely(sigismember(&p->pending.signal, SIGKILL)); @@ -365,7 +370,7 @@ static inline int __fatal_signal_pending(struct task_struct *p) static inline int fatal_signal_pending(struct task_struct *p) { - return signal_pending(p) && __fatal_signal_pending(p); + return task_sigpending(p) && __fatal_signal_pending(p); } static inline int signal_pending_state(long state, struct task_struct *p) -- cgit v1.2.3 From 12db8b690010ccfadf9d0b49a1e1798e47dbbe1a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Oct 2020 14:32:28 -0600 Subject: entry: Add support for TIF_NOTIFY_SIGNAL Add TIF_NOTIFY_SIGNAL handling in the generic entry code, which if set, will return true if signal_pending() is used in a wait loop. That causes an exit of the loop so that notify_signal tracehooks can be run. If the wait loop is currently inside a system call, the system call is restarted once task_work has been processed. In preparation for only having arch_do_signal() handle syscall restarts if _TIF_SIGPENDING isn't set, rename it to arch_do_signal_or_restart(). Pass in a boolean that tells the architecture specific signal handler if it should attempt to get a signal, or just process a potential syscall restart. For !CONFIG_GENERIC_ENTRY archs, add the TIF_NOTIFY_SIGNAL handling to get_signal(). This is done to minimize the needed architecture changes to support this feature. Signed-off-by: Jens Axboe Signed-off-by: Thomas Gleixner Reviewed-by: Oleg Nesterov Link: https://lore.kernel.org/r/20201026203230.386348-3-axboe@kernel.dk --- include/linux/entry-common.h | 11 ++++++++--- include/linux/entry-kvm.h | 4 ++-- include/linux/sched/signal.h | 11 ++++++++++- include/linux/tracehook.h | 27 +++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index efebbffcd5cc..c7bfac45f951 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -37,6 +37,10 @@ # define _TIF_UPROBE (0) #endif +#ifndef _TIF_NOTIFY_SIGNAL +# define _TIF_NOTIFY_SIGNAL (0) +#endif + /* * TIF flags handled in syscall_enter_from_usermode() */ @@ -69,7 +73,7 @@ #define EXIT_TO_USER_MODE_WORK \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | \ + _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ ARCH_EXIT_TO_USER_MODE_WORK) /** @@ -226,12 +230,13 @@ static __always_inline void arch_exit_to_user_mode(void) { } #endif /** - * arch_do_signal - Architecture specific signal delivery function + * arch_do_signal_or_restart - Architecture specific signal delivery function * @regs: Pointer to currents pt_regs + * @has_signal: actual signal to handle * * Invoked from exit_to_user_mode_loop(). */ -void arch_do_signal(struct pt_regs *regs); +void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal); /** * arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit() diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 0cef17afb41a..9b93f8584ff7 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -11,8 +11,8 @@ # define ARCH_XFER_TO_GUEST_MODE_WORK (0) #endif -#define XFER_TO_GUEST_MODE_WORK \ - (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ +#define XFER_TO_GUEST_MODE_WORK \ + (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \ _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK) struct kvm_vcpu; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 404145dc536e..bd5afa076189 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -360,6 +360,15 @@ static inline int task_sigpending(struct task_struct *p) static inline int signal_pending(struct task_struct *p) { +#if defined(TIF_NOTIFY_SIGNAL) + /* + * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same + * behavior in terms of ensuring that we break out of wait loops + * so that notify signal callbacks can be processed. + */ + if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL))) + return 1; +#endif return task_sigpending(p); } @@ -507,7 +516,7 @@ extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize); static inline void restore_saved_sigmask_unless(bool interrupted) { if (interrupted) - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); + WARN_ON(!signal_pending(current)); else restore_saved_sigmask(); } diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 36fb3bbed6b2..1e8caca92e1f 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -198,4 +198,31 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) blkcg_maybe_throttle_current(); } +/* + * called by exit_to_user_mode_loop() if ti_work & _TIF_NOTIFY_SIGNAL. This + * is currently used by TWA_SIGNAL based task_work, which requires breaking + * wait loops to ensure that task_work is noticed and run. + */ +static inline void tracehook_notify_signal(void) +{ +#if defined(TIF_NOTIFY_SIGNAL) + clear_thread_flag(TIF_NOTIFY_SIGNAL); + smp_mb__after_atomic(); + if (current->task_works) + task_work_run(); +#endif +} + +/* + * Called when we have work to process from exit_to_user_mode_loop() + */ +static inline void set_notify_signal(struct task_struct *task) +{ +#if defined(TIF_NOTIFY_SIGNAL) + if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) && + !wake_up_state(task, TASK_INTERRUPTIBLE)) + kick_process(task); +#endif +} + #endif /* */ -- cgit v1.2.3 From 5bc78502322a5e4eef3f1b2a2813751dc6434143 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 20 Oct 2020 09:47:13 -0400 Subject: sched: fix exit_mm vs membarrier (v4) exit_mm should issue memory barriers after user-space memory accesses, before clearing current->mm, to order user-space memory accesses performed prior to exit_mm before clearing tsk->mm, which has the effect of skipping the membarrier private expedited IPIs. exit_mm should also update the runqueue's membarrier_state so membarrier global expedited IPIs are not sent when they are not needed. The membarrier system call can be issued concurrently with do_exit if we have thread groups created with CLONE_VM but not CLONE_THREAD. Here is the scenario I have in mind: Two thread groups are created, A and B. Thread group B is created by issuing clone from group A with flag CLONE_VM set, but not CLONE_THREAD. Let's assume we have a single thread within each thread group (Thread A and Thread B). The AFAIU we can have: Userspace variables: int x = 0, y = 0; CPU 0 CPU 1 Thread A Thread B (in thread group A) (in thread group B) x = 1 barrier() y = 1 exit() exit_mm() current->mm = NULL; r1 = load y membarrier() skips CPU 0 (no IPI) because its current mm is NULL r2 = load x BUG_ON(r1 == 1 && r2 == 0) Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201020134715.13909-2-mathieu.desnoyers@efficios.com --- include/linux/sched/mm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index d5ece7a9a403..a91fb3ad9ec7 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -347,6 +347,8 @@ static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) extern void membarrier_exec_mmap(struct mm_struct *mm); +extern void membarrier_update_current_mm(struct mm_struct *next_mm); + #else #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS static inline void membarrier_arch_switch_mm(struct mm_struct *prev, @@ -361,6 +363,9 @@ static inline void membarrier_exec_mmap(struct mm_struct *mm) static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) { } +static inline void membarrier_update_current_mm(struct mm_struct *next_mm) +{ +} #endif #endif /* _LINUX_SCHED_MM_H */ -- cgit v1.2.3 From 8d97e71811aaafe4abf611dc24822fd6e73df1a1 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 1 Oct 2020 06:57:46 -0700 Subject: perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE Current perf can report both virtual addresses and physical addresses, but not the MMU page size. Without the MMU page size information of the utilized page, users cannot decide whether to promote/demote large pages to optimize memory usage. Add a new sample type for the data MMU page size. Current perf already has a facility to collect data virtual addresses. A page walker is required to walk the pages tables and calculate the MMU page size from a given virtual address. On some platforms, e.g., X86, the page walker is invoked in an NMI handler. So the page walker must be NMI-safe and low overhead. Besides, the page walker should work for both user and kernel virtual address. The existing generic page walker, e.g., walk_page_range_novma(), is a little bit complex and doesn't guarantee the NMI-safe. The follow_page() is only for user-virtual address. Add a new function perf_get_page_size() to walk the page tables and calculate the MMU page size. In the function: - Interrupts have to be disabled to prevent any teardown of the page tables. - For user space threads, the current->mm is used for the page walker. For kernel threads and the like, the current->mm is NULL. The init_mm is used for the page walker. The active_mm is not used here, because it can be NULL. Quote from Peter Zijlstra, "context_switch() can set prev->active_mm to NULL when it transfers it to @next. It does this before @current is updated. So an NMI that comes in between this active_mm swizzling and updating @current will see !active_mm." - The MMU page size is calculated from the page table level. The method should work for all architectures, but it has only been verified on X86. Should there be some architectures, which support perf, where the method doesn't work, it can be fixed later separately. Reporting the wrong page size would not be fatal for the architecture. Some under discussion features may impact the method in the future. Quote from Dave Hansen, "There are lots of weird things folks are trying to do with the page tables, like Address Space Isolation. For instance, if you get a perf NMI when running userspace, current->mm->pgd is *different* than the PGD that was in use when userspace was running. It's close enough today, but it might not stay that way." If the case happens later, lots of consecutive page walk errors will happen. The worst case is that lots of page-size '0' are returned, which would not be fatal. In the perf tool, a check is implemented to detect this case. Once it happens, a kernel patch could be implemented accordingly then. Suggested-by: Peter Zijlstra Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201001135749.2804-2-kan.liang@linux.intel.com --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0c19d279b97f..7e3785dd27d9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1034,6 +1034,7 @@ struct perf_sample_data { u64 phys_addr; u64 cgroup; + u64 data_page_size; } ____cacheline_aligned; /* default value for data source */ -- cgit v1.2.3 From 995f088efebe1eba0282a6ffa12411b37f8990c2 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 1 Oct 2020 06:57:49 -0700 Subject: perf/core: Add support for PERF_SAMPLE_CODE_PAGE_SIZE When studying code layout, it is useful to capture the page size of the sampled code address. Add a new sample type for code page size. The new sample type requires collecting the ip. The code page size can be calculated from the NMI-safe perf_get_page_size(). For large PEBS, it's very unlikely that the mapping is gone for the earlier PEBS records. Enable the feature for the large PEBS. The worst case is that page-size '0' is returned. Signed-off-by: Kan Liang Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201001135749.2804-5-kan.liang@linux.intel.com --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7e3785dd27d9..e533b03af053 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1035,6 +1035,7 @@ struct perf_sample_data { u64 phys_addr; u64 cgroup; u64 data_page_size; + u64 code_page_size; } ____cacheline_aligned; /* default value for data source */ -- cgit v1.2.3 From 51b646b2d9f84d6ff6300e3c1d09f2be4329a424 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Oct 2020 11:09:27 +0200 Subject: perf,mm: Handle non-page-table-aligned hugetlbfs A limited nunmber of architectures support hugetlbfs sizes that do not align with the page-tables (ARM64, Power, Sparc64). Add support for this to the generic perf_get_page_size() implementation, and also allow an architecture to override this implementation. This latter is only needed when it uses non-page-table aligned huge pages in its kernel map. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/perf_event.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e533b03af053..0defb526cd0c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1590,4 +1590,8 @@ extern void __weak arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now); +#ifdef CONFIG_MMU +extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr); +#endif + #endif /* _LINUX_PERF_EVENT_H */ -- cgit v1.2.3 From a62f68f5ca53ab61cba2f0a410d0add7a6d54a52 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Oct 2020 17:35:46 +0200 Subject: cpufreq: Introduce cpufreq_driver_test_flags() Add a helper function to test the flags of the cpufreq driver in use againt a given flags mask. In particular, this will be needed to test the CPUFREQ_NEED_UPDATE_LIMITS cpufreq driver flag in the schedutil governor. Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 038ed83aab41..1eaa04f1bae6 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -433,6 +433,7 @@ struct cpufreq_driver { int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); +bool cpufreq_driver_test_flags(u16 flags); const char *cpufreq_get_current_driver(void); void *cpufreq_get_driver_data(void); -- cgit v1.2.3 From c6838eeef2fbc7e3e1f83759aa016ae6b70c643e Mon Sep 17 00:00:00 2001 From: "dmitry.torokhov@gmail.com" Date: Wed, 30 Sep 2020 15:47:13 -0700 Subject: HID: hid-input: occasionally report stylus battery even if not changed There are styluses that only report their battery status when they are touching the touchscreen; additionally we currently suppress battery reports if capacity has not changed. To help userspace recognize how long ago the device reported battery status, let's send the change event through if either capacity has changed, or at least 30 seconds have passed since last report we've let through. Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- include/linux/hid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 58684657960b..b079146850e6 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -585,6 +585,7 @@ struct hid_device { /* device report descriptor */ __s32 battery_report_id; enum hid_battery_status battery_status; bool battery_avoid_query; + ktime_t battery_ratelimit_time; #endif unsigned long status; /* see STAT flags above */ -- cgit v1.2.3 From 4169e889e5889405d54cec27d6e9f7f0ce3c7096 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 2 Sep 2020 23:25:55 -0500 Subject: include: jhash/signal: Fix fall-through warnings for Clang In preparation to enable -Wimplicit-fallthrough for Clang, explicitly add break statements instead of letting the code fall through to the next case. This patch adds four break statements that, together, fix almost 40,000 warnings when building Linux 5.10-rc1 with Clang 12.0.0 and this[1] change reverted. Notice that in order to enable -Wimplicit-fallthrough for Clang, such change[1] is meant to be reverted at some point. So, this patch helps to move in that direction. Something important to mention is that there is currently a discrepancy between GCC and Clang when dealing with switch fall-through to empty case statements or to cases that only contain a break/continue/return statement[2][3][4]. Now that the -Wimplicit-fallthrough option has been globally enabled[5], any compiler should really warn on missing either a fallthrough annotation or any of the other case-terminating statements (break/continue/return/ goto) when falling through to the next case statement. Making exceptions to this introduces variation in case handling which may continue to lead to bugs, misunderstandings, and a general lack of robustness. The point of enabling options like -Wimplicit-fallthrough is to prevent human error and aid developers in spotting bugs before their code is even built/ submitted/committed, therefore eliminating classes of bugs. So, in order to really accomplish this, we should, and can, move in the direction of addressing any error-prone scenarios and get rid of the unintentional fallthrough bug-class in the kernel, entirely, even if there is some minor redundancy. Better to have explicit case-ending statements than continue to have exceptions where one must guess as to the right result. The compiler will eliminate any actual redundancy. [1] commit e2079e93f562c ("kbuild: Do not enable -Wimplicit-fallthrough for clang for now") [2] https://github.com/ClangBuiltLinux/linux/issues/636 [3] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91432 [4] https://godbolt.org/z/xgkvIh [5] commit a035d552a93b ("Makefile: Globally enable fall-through warning") Co-developed-by: Kees Cook Signed-off-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- include/linux/jhash.h | 2 ++ include/linux/signal.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jhash.h b/include/linux/jhash.h index cfb62e9f37be..ab7f8c152b89 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -99,6 +99,7 @@ static inline u32 jhash(const void *key, u32 length, u32 initval) case 2: a += (u32)k[1]<<8; fallthrough; case 1: a += k[0]; __jhash_final(a, b, c); + break; case 0: /* Nothing left to add */ break; } @@ -136,6 +137,7 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) case 2: b += k[1]; fallthrough; case 1: a += k[0]; __jhash_final(a, b, c); + break; case 0: /* Nothing left to add */ break; } diff --git a/include/linux/signal.h b/include/linux/signal.h index 7bbc0e9cf084..b256f9c65661 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -238,6 +238,7 @@ static inline void siginitset(sigset_t *set, unsigned long mask) memset(&set->sig[1], 0, sizeof(long)*(_NSIG_WORDS-1)); break; case 2: set->sig[1] = 0; + break; case 1: ; } } @@ -250,6 +251,7 @@ static inline void siginitsetinv(sigset_t *set, unsigned long mask) memset(&set->sig[1], -1, sizeof(long)*(_NSIG_WORDS-1)); break; case 2: set->sig[1] = -1; + break; case 1: ; } } -- cgit v1.2.3 From f54ec58fee837ec847cb8b50593e81bfaa46107f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 27 Oct 2020 21:12:12 +0100 Subject: wimax: move out to staging There are no known users of this driver as of October 2020, and it will be removed unless someone turns out to still need it in future releases. According to https://en.wikipedia.org/wiki/List_of_WiMAX_networks, there have been many public wimax networks, but it appears that many of these have migrated to LTE or discontinued their service altogether. As most PCs and phones lack WiMAX hardware support, the remaining networks tend to use standalone routers. These almost certainly run Linux, but not a modern kernel or the mainline wimax driver stack. NetworkManager appears to have dropped userspace support in 2015 https://bugzilla.gnome.org/show_bug.cgi?id=747846, the www.linuxwimax.org site had already shut down earlier. WiMax is apparently still being deployed on airport campus networks ("AeroMACS"), but in a frequency band that was not supported by the old Intel 2400m (used in Sandy Bridge laptops and earlier), which is the only driver using the kernel's wimax stack. Move all files into drivers/staging/wimax, including the uapi header files and documentation, to make it easier to remove it when it gets to that. Only minimal changes are made to the source files, in order to make it possible to port patches across the move. Also remove the MAINTAINERS entry that refers to a broken mailing list and website. Acked-by: Jakub Kicinski Acked-by: Greg Kroah-Hartman Acked-By: Inaky Perez-Gonzalez Acked-by: Johannes Berg Suggested-by: Inaky Perez-Gonzalez Signed-off-by: Arnd Bergmann --- include/linux/wimax/debug.h | 491 -------------------------------------------- 1 file changed, 491 deletions(-) delete mode 100644 include/linux/wimax/debug.h (limited to 'include/linux') diff --git a/include/linux/wimax/debug.h b/include/linux/wimax/debug.h deleted file mode 100644 index cdae052bcdcd..000000000000 --- a/include/linux/wimax/debug.h +++ /dev/null @@ -1,491 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Linux WiMAX - * Collection of tools to manage debug operations. - * - * Copyright (C) 2005-2007 Intel Corporation - * Inaky Perez-Gonzalez - * - * Don't #include this file directly, read on! - * - * EXECUTING DEBUGGING ACTIONS OR NOT - * - * The main thing this framework provides is decission power to take a - * debug action (like printing a message) if the current debug level - * allows it. - * - * The decission power is at two levels: at compile-time (what does - * not make it is compiled out) and at run-time. The run-time - * selection is done per-submodule (as they are declared by the user - * of the framework). - * - * A call to d_test(L) (L being the target debug level) returns true - * if the action should be taken because the current debug levels - * allow it (both compile and run time). - * - * It follows that a call to d_test() that can be determined to be - * always false at compile time will get the code depending on it - * compiled out by optimization. - * - * DEBUG LEVELS - * - * It is up to the caller to define how much a debugging level is. - * - * Convention sets 0 as "no debug" (so an action marked as debug level 0 - * will always be taken). The increasing debug levels are used for - * increased verbosity. - * - * USAGE - * - * Group the code in modules and submodules inside each module [which - * in most cases maps to Linux modules and .c files that compose - * those]. - * - * For each module, there is: - * - * - a MODULENAME (single word, legal C identifier) - * - * - a debug-levels.h header file that declares the list of - * submodules and that is included by all .c files that use - * the debugging tools. The file name can be anything. - * - * - some (optional) .c code to manipulate the runtime debug levels - * through debugfs. - * - * The debug-levels.h file would look like: - * - * #ifndef __debug_levels__h__ - * #define __debug_levels__h__ - * - * #define D_MODULENAME modulename - * #define D_MASTER 10 - * - * #include - * - * enum d_module { - * D_SUBMODULE_DECLARE(submodule_1), - * D_SUBMODULE_DECLARE(submodule_2), - * ... - * D_SUBMODULE_DECLARE(submodule_N) - * }; - * - * #endif - * - * D_MASTER is the maximum compile-time debug level; any debug actions - * above this will be out. D_MODULENAME is the module name (legal C - * identifier), which has to be unique for each module (to avoid - * namespace collisions during linkage). Note those #defines need to - * be done before #including debug.h - * - * We declare N different submodules whose debug level can be - * independently controlled during runtime. - * - * In a .c file of the module (and only in one of them), define the - * following code: - * - * struct d_level D_LEVEL[] = { - * D_SUBMODULE_DEFINE(submodule_1), - * D_SUBMODULE_DEFINE(submodule_2), - * ... - * D_SUBMODULE_DEFINE(submodule_N), - * }; - * size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); - * - * Externs for d_level_MODULENAME and d_level_size_MODULENAME are used - * and declared in this file using the D_LEVEL and D_LEVEL_SIZE macros - * #defined also in this file. - * - * To manipulate from user space the levels, create a debugfs dentry - * and then register each submodule with: - * - * d_level_register_debugfs("PREFIX_", submodule_X, parent); - * - * Where PREFIX_ is a name of your chosing. This will create debugfs - * file with a single numeric value that can be use to tweak it. To - * remove the entires, just use debugfs_remove_recursive() on 'parent'. - * - * NOTE: remember that even if this will show attached to some - * particular instance of a device, the settings are *global*. - * - * On each submodule (for example, .c files), the debug infrastructure - * should be included like this: - * - * #define D_SUBMODULE submodule_x // matches one in debug-levels.h - * #include "debug-levels.h" - * - * after #including all your include files. - * - * Now you can use the d_*() macros below [d_test(), d_fnstart(), - * d_fnend(), d_printf(), d_dump()]. - * - * If their debug level is greater than D_MASTER, they will be - * compiled out. - * - * If their debug level is lower or equal than D_MASTER but greater - * than the current debug level of their submodule, they'll be - * ignored. - * - * Otherwise, the action will be performed. - */ -#ifndef __debug__h__ -#define __debug__h__ - -#include -#include - -struct device; - -/* Backend stuff */ - -/* - * Debug backend: generate a message header from a 'struct device' - * - * @head: buffer where to place the header - * @head_size: length of @head - * @dev: pointer to device used to generate a header from. If NULL, - * an empty ("") header is generated. - */ -static inline -void __d_head(char *head, size_t head_size, - struct device *dev) -{ - if (dev == NULL) - head[0] = 0; - else if ((unsigned long)dev < 4096) { - printk(KERN_ERR "E: Corrupt dev %p\n", dev); - WARN_ON(1); - } else - snprintf(head, head_size, "%s %s: ", - dev_driver_string(dev), dev_name(dev)); -} - - -/* - * Debug backend: log some message if debugging is enabled - * - * @l: intended debug level - * @tag: tag to prefix the message with - * @dev: 'struct device' associated to this message - * @f: printf-like format and arguments - * - * Note this is optimized out if it doesn't pass the compile-time - * check; however, it is *always* compiled. This is useful to make - * sure the printf-like formats and variables are always checked and - * they don't get bit rot if you have all the debugging disabled. - */ -#define _d_printf(l, tag, dev, f, a...) \ -do { \ - char head[64]; \ - if (!d_test(l)) \ - break; \ - __d_head(head, sizeof(head), dev); \ - printk(KERN_ERR "%s%s%s: " f, head, __func__, tag, ##a); \ -} while (0) - - -/* - * CPP syntactic sugar to generate A_B like symbol names when one of - * the arguments is a preprocessor #define. - */ -#define __D_PASTE__(varname, modulename) varname##_##modulename -#define __D_PASTE(varname, modulename) (__D_PASTE__(varname, modulename)) -#define _D_SUBMODULE_INDEX(_name) (D_SUBMODULE_DECLARE(_name)) - - -/* - * Store a submodule's runtime debug level and name - */ -struct d_level { - u8 level; - const char *name; -}; - - -/* - * List of available submodules and their debug levels - * - * We call them d_level_MODULENAME and d_level_size_MODULENAME; the - * macros D_LEVEL and D_LEVEL_SIZE contain the name already for - * convenience. - * - * This array and the size are defined on some .c file that is part of - * the current module. - */ -#define D_LEVEL __D_PASTE(d_level, D_MODULENAME) -#define D_LEVEL_SIZE __D_PASTE(d_level_size, D_MODULENAME) - -extern struct d_level D_LEVEL[]; -extern size_t D_LEVEL_SIZE; - - -/* - * Frontend stuff - * - * - * Stuff you need to declare prior to using the actual "debug" actions - * (defined below). - */ - -#ifndef D_MODULENAME -#error D_MODULENAME is not defined in your debug-levels.h file -/** - * D_MODULE - Name of the current module - * - * #define in your module's debug-levels.h, making sure it is - * unique. This has to be a legal C identifier. - */ -#define D_MODULENAME undefined_modulename -#endif - - -#ifndef D_MASTER -#warning D_MASTER not defined, but debug.h included! [see docs] -/** - * D_MASTER - Compile time maximum debug level - * - * #define in your debug-levels.h file to the maximum debug level the - * runtime code will be allowed to have. This allows you to provide a - * main knob. - * - * Anything above that level will be optimized out of the compile. - * - * Defaults to zero (no debug code compiled in). - * - * Maximum one definition per module (at the debug-levels.h file). - */ -#define D_MASTER 0 -#endif - -#ifndef D_SUBMODULE -#error D_SUBMODULE not defined, but debug.h included! [see docs] -/** - * D_SUBMODULE - Name of the current submodule - * - * #define in your submodule .c file before #including debug-levels.h - * to the name of the current submodule as previously declared and - * defined with D_SUBMODULE_DECLARE() (in your module's - * debug-levels.h) and D_SUBMODULE_DEFINE(). - * - * This is used to provide runtime-control over the debug levels. - * - * Maximum one per .c file! Can be shared among different .c files - * (meaning they belong to the same submodule categorization). - */ -#define D_SUBMODULE undefined_module -#endif - - -/** - * D_SUBMODULE_DECLARE - Declare a submodule for runtime debug level control - * - * @_name: name of the submodule, restricted to the chars that make up a - * valid C identifier ([a-zA-Z0-9_]). - * - * Declare in the module's debug-levels.h header file as: - * - * enum d_module { - * D_SUBMODULE_DECLARE(submodule_1), - * D_SUBMODULE_DECLARE(submodule_2), - * D_SUBMODULE_DECLARE(submodule_3), - * }; - * - * Some corresponding .c file needs to have a matching - * D_SUBMODULE_DEFINE(). - */ -#define D_SUBMODULE_DECLARE(_name) __D_SUBMODULE_##_name - - -/** - * D_SUBMODULE_DEFINE - Define a submodule for runtime debug level control - * - * @_name: name of the submodule, restricted to the chars that make up a - * valid C identifier ([a-zA-Z0-9_]). - * - * Use once per module (in some .c file) as: - * - * static - * struct d_level d_level_SUBMODULENAME[] = { - * D_SUBMODULE_DEFINE(submodule_1), - * D_SUBMODULE_DEFINE(submodule_2), - * D_SUBMODULE_DEFINE(submodule_3), - * }; - * size_t d_level_size_SUBDMODULENAME = ARRAY_SIZE(d_level_SUBDMODULENAME); - * - * Matching D_SUBMODULE_DECLARE()s have to be present in a - * debug-levels.h header file. - */ -#define D_SUBMODULE_DEFINE(_name) \ -[__D_SUBMODULE_##_name] = { \ - .level = 0, \ - .name = #_name \ -} - - - -/* The actual "debug" operations */ - - -/** - * d_test - Returns true if debugging should be enabled - * - * @l: intended debug level (unsigned) - * - * If the master debug switch is enabled and the current settings are - * higher or equal to the requested level, then debugging - * output/actions should be enabled. - * - * NOTE: - * - * This needs to be coded so that it can be evaluated in compile - * time; this is why the ugly BUG_ON() is placed in there, so the - * D_MASTER evaluation compiles all out if it is compile-time false. - */ -#define d_test(l) \ -({ \ - unsigned __l = l; /* type enforcer */ \ - (D_MASTER) >= __l \ - && ({ \ - BUG_ON(_D_SUBMODULE_INDEX(D_SUBMODULE) >= D_LEVEL_SIZE);\ - D_LEVEL[_D_SUBMODULE_INDEX(D_SUBMODULE)].level >= __l; \ - }); \ -}) - - -/** - * d_fnstart - log message at function start if debugging enabled - * - * @l: intended debug level - * @_dev: 'struct device' pointer, NULL if none (for context) - * @f: printf-like format and arguments - */ -#define d_fnstart(l, _dev, f, a...) _d_printf(l, " FNSTART", _dev, f, ## a) - - -/** - * d_fnend - log message at function end if debugging enabled - * - * @l: intended debug level - * @_dev: 'struct device' pointer, NULL if none (for context) - * @f: printf-like format and arguments - */ -#define d_fnend(l, _dev, f, a...) _d_printf(l, " FNEND", _dev, f, ## a) - - -/** - * d_printf - log message if debugging enabled - * - * @l: intended debug level - * @_dev: 'struct device' pointer, NULL if none (for context) - * @f: printf-like format and arguments - */ -#define d_printf(l, _dev, f, a...) _d_printf(l, "", _dev, f, ## a) - - -/** - * d_dump - log buffer hex dump if debugging enabled - * - * @l: intended debug level - * @_dev: 'struct device' pointer, NULL if none (for context) - * @f: printf-like format and arguments - */ -#define d_dump(l, dev, ptr, size) \ -do { \ - char head[64]; \ - if (!d_test(l)) \ - break; \ - __d_head(head, sizeof(head), dev); \ - print_hex_dump(KERN_ERR, head, 0, 16, 1, \ - ((void *) ptr), (size), 0); \ -} while (0) - - -/** - * Export a submodule's debug level over debugfs as PREFIXSUBMODULE - * - * @prefix: string to prefix the name with - * @submodule: name of submodule (not a string, just the name) - * @dentry: debugfs parent dentry - * - * For removing, just use debugfs_remove_recursive() on the parent. - */ -#define d_level_register_debugfs(prefix, name, parent) \ -({ \ - debugfs_create_u8( \ - prefix #name, 0600, parent, \ - &(D_LEVEL[__D_SUBMODULE_ ## name].level)); \ -}) - - -static inline -void d_submodule_set(struct d_level *d_level, size_t d_level_size, - const char *submodule, u8 level, const char *tag) -{ - struct d_level *itr, *top; - int index = -1; - - for (itr = d_level, top = itr + d_level_size; itr < top; itr++) { - index++; - if (itr->name == NULL) { - printk(KERN_ERR "%s: itr->name NULL?? (%p, #%d)\n", - tag, itr, index); - continue; - } - if (!strcmp(itr->name, submodule)) { - itr->level = level; - return; - } - } - printk(KERN_ERR "%s: unknown submodule %s\n", tag, submodule); -} - - -/** - * d_parse_params - Parse a string with debug parameters from the - * command line - * - * @d_level: level structure (D_LEVEL) - * @d_level_size: number of items in the level structure - * (D_LEVEL_SIZE). - * @_params: string with the parameters; this is a space (not tab!) - * separated list of NAME:VALUE, where value is the debug level - * and NAME is the name of the submodule. - * @tag: string for error messages (example: MODULE.ARGNAME). - */ -static inline -void d_parse_params(struct d_level *d_level, size_t d_level_size, - const char *_params, const char *tag) -{ - char submodule[130], *params, *params_orig, *token, *colon; - unsigned level, tokens; - - if (_params == NULL) - return; - params_orig = kstrdup(_params, GFP_KERNEL); - params = params_orig; - while (1) { - token = strsep(¶ms, " "); - if (token == NULL) - break; - if (*token == '\0') /* eat joint spaces */ - continue; - /* kernel's sscanf %s eats until whitespace, so we - * replace : by \n so it doesn't get eaten later by - * strsep */ - colon = strchr(token, ':'); - if (colon != NULL) - *colon = '\n'; - tokens = sscanf(token, "%s\n%u", submodule, &level); - if (colon != NULL) - *colon = ':'; /* set back, for error messages */ - if (tokens == 2) - d_submodule_set(d_level, d_level_size, - submodule, level, tag); - else - printk(KERN_ERR "%s: can't parse '%s' as a " - "SUBMODULE:LEVEL (%d tokens)\n", - tag, token, tokens); - } - kfree(params_orig); -} - -#endif /* #ifndef __debug__h__ */ -- cgit v1.2.3 From a4147d855f50a676ebe61833a681f7c71945f343 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 31 Aug 2020 10:18:04 -0500 Subject: dmaengine: ti-cppi5: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays Signed-off-by: Gustavo A. R. Silva --- include/linux/dma/ti-cppi5.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma/ti-cppi5.h b/include/linux/dma/ti-cppi5.h index 5896441ee604..efa2f0309f00 100644 --- a/include/linux/dma/ti-cppi5.h +++ b/include/linux/dma/ti-cppi5.h @@ -47,7 +47,7 @@ struct cppi5_host_desc_t { u32 buf_info1; u32 org_buf_len; u64 org_buf_ptr; - u32 epib[0]; + u32 epib[]; } __packed; #define CPPI5_DESC_MIN_ALIGN (16U) @@ -139,7 +139,7 @@ struct cppi5_desc_epib_t { */ struct cppi5_monolithic_desc_t { struct cppi5_desc_hdr_t hdr; - u32 epib[0]; + u32 epib[]; }; #define CPPI5_INFO2_MDESC_DATA_OFFSET_SHIFT (18U) -- cgit v1.2.3 From 277ffd6c1ec0aa60856a03e18455fcca7d2a1186 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 31 Aug 2020 10:19:18 -0500 Subject: mailbox: zynqmp-ipi-message: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays Signed-off-by: Gustavo A. R. Silva --- include/linux/mailbox/zynqmp-ipi-message.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mailbox/zynqmp-ipi-message.h b/include/linux/mailbox/zynqmp-ipi-message.h index 9542b41eacfd..35ce84c8ca02 100644 --- a/include/linux/mailbox/zynqmp-ipi-message.h +++ b/include/linux/mailbox/zynqmp-ipi-message.h @@ -14,7 +14,7 @@ */ struct zynqmp_ipi_message { size_t len; - u8 data[0]; + u8 data[]; }; #endif /* _LINUX_ZYNQMP_IPI_MESSAGE_H_ */ -- cgit v1.2.3 From 883541051567a62add043a9f4ca5a31f2970bffd Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 31 Aug 2020 10:21:14 -0500 Subject: platform/chrome: cros_ec_commands: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays Signed-off-by: Gustavo A. R. Silva --- include/linux/platform_data/cros_ec_commands.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 1fcfe9e63cb9..a3a9a878415f 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1419,7 +1419,7 @@ struct ec_response_flash_info_2 { uint16_t num_banks_total; /* Number of banks described in banks array. */ uint16_t num_banks_desc; - struct ec_flash_bank banks[0]; + struct ec_flash_bank banks[]; } __ec_align4; /* @@ -2420,12 +2420,12 @@ struct ec_response_motion_sense_fifo_info { /* Total amount of vector lost */ uint16_t total_lost; /* Lost events since the last fifo_info, per sensors */ - uint16_t lost[0]; + uint16_t lost[]; } __ec_todo_packed; struct ec_response_motion_sense_fifo_data { uint32_t number_data; - struct ec_response_motion_sensor_data data[0]; + struct ec_response_motion_sensor_data data[]; } __ec_todo_packed; /* List supported activity recognition */ @@ -3093,7 +3093,7 @@ struct ec_response_tmp006_get_calibration_v1 { uint8_t algorithm; uint8_t num_params; uint8_t reserved[2]; - float val[0]; + float val[]; } __ec_align4; struct ec_params_tmp006_set_calibration_v1 { @@ -3101,7 +3101,7 @@ struct ec_params_tmp006_set_calibration_v1 { uint8_t algorithm; uint8_t num_params; uint8_t reserved; - float val[0]; + float val[]; } __ec_align4; @@ -5076,7 +5076,7 @@ struct ec_response_pd_log { uint8_t type; /* event type : see PD_EVENT_xx below */ uint8_t size_port; /* [7:5] port number [4:0] payload size in bytes */ uint16_t data; /* type-defined data payload */ - uint8_t payload[0]; /* optional additional data payload: 0..16 bytes */ + uint8_t payload[]; /* optional additional data payload: 0..16 bytes */ } __ec_align4; /* The timestamp is the microsecond counter shifted to get about a ms. */ @@ -5789,7 +5789,7 @@ struct ec_response_fp_encryption_status { struct ec_response_tp_frame_info { uint32_t n_frames; - uint32_t frame_sizes[0]; + uint32_t frame_sizes[]; } __ec_align4; /* Create a snapshot of current frame readings */ -- cgit v1.2.3 From 120088832042e6dc9866160ff267f8c347bf53e6 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 31 Aug 2020 10:21:55 -0500 Subject: platform/chrome: cros_ec_proto: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays Signed-off-by: Gustavo A. R. Silva --- include/linux/platform_data/cros_ec_proto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 4a415ae851ef..02599687770c 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -69,7 +69,7 @@ struct cros_ec_command { uint32_t outsize; uint32_t insize; uint32_t result; - uint8_t data[0]; + uint8_t data[]; }; /** -- cgit v1.2.3 From 5e01fdff04b7f7c3b8d456c11c8a9f978b4ddf65 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 31 Aug 2020 08:25:42 -0500 Subject: fs: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays Signed-off-by: Gustavo A. R. Silva --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0bd126418bb6..21cc971fd960 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3285,7 +3285,7 @@ static inline ino_t parent_ino(struct dentry *dentry) */ struct simple_transaction_argresp { ssize_t size; - char data[0]; + char data[]; }; #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) -- cgit v1.2.3 From 080b6f40763565f65ebb9540219c71ce885cf568 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 28 Oct 2020 18:15:05 +0100 Subject: bpf: Don't rely on GCC __attribute__((optimize)) to disable GCSE Commit 3193c0836 ("bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()") introduced a __no_fgcse macro that expands to a function scope __attribute__((optimize("-fno-gcse"))), to disable a GCC specific optimization that was causing trouble on x86 builds, and was not expected to have any positive effect in the first place. However, as the GCC manual documents, __attribute__((optimize)) is not for production use, and results in all other optimization options to be forgotten for the function in question. This can cause all kinds of trouble, but in one particular reported case, it causes -fno-asynchronous-unwind-tables to be disregarded, resulting in .eh_frame info to be emitted for the function. This reverts commit 3193c0836, and instead, it disables the -fgcse optimization for the entire source file, but only when building for X86 using GCC with CONFIG_BPF_JIT_ALWAYS_ON disabled. Note that the original commit states that CONFIG_RETPOLINE=n triggers the issue, whereas CONFIG_RETPOLINE=y performs better without the optimization, so it is kept disabled in both cases. Fixes: 3193c0836f20 ("bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()") Signed-off-by: Ard Biesheuvel Signed-off-by: Alexei Starovoitov Tested-by: Geert Uytterhoeven Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/lkml/CAMuHMdUg0WJHEcq6to0-eODpXPOywLot6UD2=GFHpzoj_hCoBQ@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20201028171506.15682-2-ardb@kernel.org --- include/linux/compiler-gcc.h | 2 -- include/linux/compiler_types.h | 4 ---- 2 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index d1e3c6896b71..5deb37024574 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -175,5 +175,3 @@ #else #define __diag_GCC_8(s) #endif - -#define __no_fgcse __attribute__((optimize("-fno-gcse"))) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6e390d58a9f8..ac3fa37a84f9 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -247,10 +247,6 @@ struct ftrace_likely_data { #define asm_inline asm #endif -#ifndef __no_fgcse -# define __no_fgcse -#endif - /* Are two types/vars the same type (ignoring qualifiers)? */ #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) -- cgit v1.2.3 From 0d519cbf38eed4f895aed197d4b135fa7f60f7c2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 23 Oct 2020 15:10:37 +0200 Subject: debugfs: remove return value of debugfs_create_devm_seqfile() No one checks the return value of debugfs_create_devm_seqfile(), as it's not needed, so make the return value void, so that no one tries to do so in the future. Link: https://lore.kernel.org/r/20201023131037.2500765-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 851dd1f9a8a5..d6c4cc9ecc77 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -144,10 +144,9 @@ void debugfs_create_u32_array(const char *name, umode_t mode, struct dentry *parent, struct debugfs_u32_array *array); -struct dentry *debugfs_create_devm_seqfile(struct device *dev, const char *name, - struct dentry *parent, - int (*read_fn)(struct seq_file *s, - void *data)); +void debugfs_create_devm_seqfile(struct device *dev, const char *name, + struct dentry *parent, + int (*read_fn)(struct seq_file *s, void *data)); bool debugfs_initialized(void); @@ -327,13 +326,12 @@ static inline void debugfs_create_u32_array(const char *name, umode_t mode, { } -static inline struct dentry *debugfs_create_devm_seqfile(struct device *dev, - const char *name, - struct dentry *parent, - int (*read_fn)(struct seq_file *s, - void *data)) +static inline void debugfs_create_devm_seqfile(struct device *dev, + const char *name, + struct dentry *parent, + int (*read_fn)(struct seq_file *s, + void *data)) { - return ERR_PTR(-ENODEV); } static inline ssize_t debugfs_read_file_bool(struct file *file, -- cgit v1.2.3 From 46d6c5ae953cc0be38efd0e469284df7c4328cf8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 29 Oct 2020 03:56:06 +0100 Subject: netfilter: use actual socket sk rather than skb sk when routing harder If netfilter changes the packet mark when mangling, the packet is rerouted using the route_me_harder set of functions. Prior to this commit, there's one big difference between route_me_harder and the ordinary initial routing functions, described in the comment above __ip_queue_xmit(): /* Note: skb->sk can be different from sk, in case of tunnels */ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, That function goes on to correctly make use of sk->sk_bound_dev_if, rather than skb->sk->sk_bound_dev_if. And indeed the comment is true: a tunnel will receive a packet in ndo_start_xmit with an initial skb->sk. It will make some transformations to that packet, and then it will send the encapsulated packet out of a *new* socket. That new socket will basically always have a different sk_bound_dev_if (otherwise there'd be a routing loop). So for the purposes of routing the encapsulated packet, the routing information as it pertains to the socket should come from that socket's sk, rather than the packet's original skb->sk. For that reason __ip_queue_xmit() and related functions all do the right thing. One might argue that all tunnels should just call skb_orphan(skb) before transmitting the encapsulated packet into the new socket. But tunnels do *not* do this -- and this is wisely avoided in skb_scrub_packet() too -- because features like TSQ rely on skb->destructor() being called when that buffer space is truely available again. Calling skb_orphan(skb) too early would result in buffers filling up unnecessarily and accounting info being all wrong. Instead, additional routing must take into account the new sk, just as __ip_queue_xmit() notes. So, this commit addresses the problem by fishing the correct sk out of state->sk -- it's already set properly in the call to nf_hook() in __ip_local_out(), which receives the sk as part of its normal functionality. So we make sure to plumb state->sk through the various route_me_harder functions, and then make correct use of it following the example of __ip_queue_xmit(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason A. Donenfeld Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv4.h | 2 +- include/linux/netfilter_ipv6.h | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 082e2c41b7ff..5b70ca868bb1 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -16,7 +16,7 @@ struct ip_rt_info { u_int32_t mark; }; -int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type); +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned addr_type); struct nf_queue_entry; diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 9b67394471e1..48314ade1506 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -42,7 +42,7 @@ struct nf_ipv6_ops { #if IS_MODULE(CONFIG_IPV6) int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); - int (*route_me_harder)(struct net *net, struct sk_buff *skb); + int (*route_me_harder)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*dev_get_saddr)(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); @@ -143,9 +143,9 @@ static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk, #endif } -int ip6_route_me_harder(struct net *net, struct sk_buff *skb); +int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb); -static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb) +static inline int nf_ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb) { #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); @@ -153,9 +153,9 @@ static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb) if (!v6_ops) return -EHOSTUNREACH; - return v6_ops->route_me_harder(net, skb); + return v6_ops->route_me_harder(net, sk, skb); #elif IS_BUILTIN(CONFIG_IPV6) - return ip6_route_me_harder(net, skb); + return ip6_route_me_harder(net, sk, skb); #else return -EHOSTUNREACH; #endif -- cgit v1.2.3 From c0391b6ab810381df632677a1dcbbbbd63d05b6d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 29 Oct 2020 13:50:03 +0100 Subject: netfilter: nf_tables: missing validation from the abort path If userspace does not include the trailing end of batch message, then nfnetlink aborts the transaction. This allows to check that ruleset updates trigger no errors. After this patch, invoking this command from the prerouting chain: # nft -c add rule x y fib saddr . oif type local fails since oif is not supported there. This patch fixes the lack of rule validation from the abort/check path to catch configuration errors such as the one above. Fixes: a654de8fdc18 ("netfilter: nf_tables: fix chain dependency validation") Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 89016d08f6a2..f6267e2883f2 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -24,6 +24,12 @@ struct nfnl_callback { const u_int16_t attr_count; /* number of nlattr's */ }; +enum nfnl_abort_action { + NFNL_ABORT_NONE = 0, + NFNL_ABORT_AUTOLOAD, + NFNL_ABORT_VALIDATE, +}; + struct nfnetlink_subsystem { const char *name; __u8 subsys_id; /* nfnetlink subsystem ID */ @@ -31,7 +37,8 @@ struct nfnetlink_subsystem { const struct nfnl_callback *cb; /* callback for individual types */ struct module *owner; int (*commit)(struct net *net, struct sk_buff *skb); - int (*abort)(struct net *net, struct sk_buff *skb, bool autoload); + int (*abort)(struct net *net, struct sk_buff *skb, + enum nfnl_abort_action action); void (*cleanup)(struct net *net); bool (*valid_genid)(struct net *net, u32 genid); }; -- cgit v1.2.3 From 77f6c0b87479c4578ac0798fc249637092ac45a3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 24 Sep 2020 12:30:50 +0200 Subject: timekeeping: remove arch_gettimeoffset With Arm EBSA110 gone, nothing uses it any more, so the corresponding code and the Kconfig option can be removed. Acked-by: Thomas Gleixner Reviewed-by: Linus Walleij Signed-off-by: Arnd Bergmann --- include/linux/time.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index b142cb5f5a53..16cf4522d6f3 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -21,19 +21,6 @@ extern time64_t mktime64(const unsigned int year, const unsigned int mon, const unsigned int day, const unsigned int hour, const unsigned int min, const unsigned int sec); -/* Some architectures do not supply their own clocksource. - * This is mainly the case in architectures that get their - * inter-tick times by reading the counter on their interval - * timer. Since these timers wrap every tick, they're not really - * useful as clocksources. Wrapping them to act like one is possible - * but not very efficient. So we provide a callout these arches - * can implement for use with the jiffies clocksource to provide - * finer then tick granular time. - */ -#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET -extern u32 (*arch_gettimeoffset)(void); -#endif - #ifdef CONFIG_POSIX_TIMERS extern void clear_itimer(void); #else -- cgit v1.2.3 From b3550164a19d62e515af6cacb5a31f0b2b3f9501 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 24 Sep 2020 15:21:43 +0200 Subject: timekeeping: add CONFIG_LEGACY_TIMER_TICK All platforms that currently do not use generic clockevents roughly call the same set of functions in their timer interrupts: xtime_update(), update_process_times() and profile_tick(), sometimes in a different sequence. Add a helper function that performs all three of them, to make the callers more uniform and simplify the interface. Reviewed-by: Geert Uytterhoeven Reviewed-by: Linus Walleij Signed-off-by: Arnd Bergmann --- include/linux/timekeeping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7f7e4a3f4394..3670cb1670ff 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -12,6 +12,7 @@ extern int timekeeping_suspended; /* Architecture timer tick functions: */ extern void update_process_times(int user); extern void xtime_update(unsigned long ticks); +extern void legacy_timer_tick(unsigned long ticks); /* * Get and set timeofday -- cgit v1.2.3 From 56cc7b8acfb7c763f71c0492fa8da01dca7c1760 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 24 Sep 2020 17:39:11 +0200 Subject: timekeeping: remove xtime_update There are no more users of xtime_update aside from legacy_timer_tick(), so fold it into that function and remove the declaration. update_process_times() is now only called inside of the kernel/time/ code, so the declaration can be moved there. Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Reviewed-by: Linus Walleij Signed-off-by: Arnd Bergmann --- include/linux/timekeeping.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 3670cb1670ff..d47009611109 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -10,8 +10,6 @@ void timekeeping_init(void); extern int timekeeping_suspended; /* Architecture timer tick functions: */ -extern void update_process_times(int user); -extern void xtime_update(unsigned long ticks); extern void legacy_timer_tick(unsigned long ticks); /* -- cgit v1.2.3 From 290562075d4d9e85b7ff4104f9a634ffc3cccb69 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 27 Oct 2020 15:28:40 -0500 Subject: net/mlx5: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9/process/deprecated.html#zero-length-and-one-element-arrays Signed-off-by: Gustavo A. R. Silva --- include/linux/mlx5/mlx5_ifc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 651591a2965d..a092346c7b2d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5823,7 +5823,7 @@ struct mlx5_ifc_alloc_modify_header_context_in_bits { u8 reserved_at_68[0x10]; u8 num_of_actions[0x8]; - union mlx5_ifc_set_add_copy_action_in_auto_bits actions[0]; + union mlx5_ifc_set_add_copy_action_in_auto_bits actions[]; }; struct mlx5_ifc_dealloc_modify_header_context_out_bits { @@ -9761,7 +9761,7 @@ struct mlx5_ifc_mcda_reg_bits { u8 reserved_at_60[0x20]; - u8 data[0][0x20]; + u8 data[][0x20]; }; enum { -- cgit v1.2.3 From e38d86b354f96e3ed6f5e6c7dbb442d7107fc0bd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 29 Oct 2020 15:05:08 +0800 Subject: sctp: add the error cause for new encapsulation port restart This patch is to add the function to make the abort chunk with the error cause for new encapsulation port restart, defined on Section 4.4 in draft-tuexen-tsvwg-sctp-udp-encaps-cons-03. v1->v2: - no change. v2->v3: - no need to call htons() when setting nep.cur_port/new_port. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- include/linux/sctp.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 76731230bbc5..bb1926589693 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -482,11 +482,13 @@ enum sctp_error { * 11 Restart of an association with new addresses * 12 User Initiated Abort * 13 Protocol Violation + * 14 Restart of an Association with New Encapsulation Port */ SCTP_ERROR_RESTART = cpu_to_be16(0x0b), SCTP_ERROR_USER_ABORT = cpu_to_be16(0x0c), SCTP_ERROR_PROTO_VIOLATION = cpu_to_be16(0x0d), + SCTP_ERROR_NEW_ENCAP_PORT = cpu_to_be16(0x0e), /* ADDIP Section 3.3 New Error Causes * @@ -793,4 +795,22 @@ enum { SCTP_FLOWLABEL_VAL_MASK = 0xfffff }; +/* UDP Encapsulation + * draft-tuexen-tsvwg-sctp-udp-encaps-cons-03.html#section-4-4 + * + * The error cause indicating an "Restart of an Association with + * New Encapsulation Port" + * + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Cause Code = 14 | Cause Length = 8 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Current Encapsulation Port | New Encapsulation Port | + * +-------------------------------+-------------------------------+ + */ +struct sctp_new_encap_port_hdr { + __be16 cur_port; + __be16 new_port; +}; + #endif /* __LINUX_SCTP_H__ */ -- cgit v1.2.3 From 1887023a5e96f98249574e0785b7aa2e5742ca68 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Wed, 28 Oct 2020 11:15:40 -0600 Subject: net: phy: marvell: add special handling of Finisar modules with 88E1111 The Finisar FCLF8520P2BTL 1000BaseT SFP module uses a Marvel 88E1111 PHY with a modified PHY ID. Add support for this ID using the 88E1111 methods. By default these modules do not have 1000BaseX auto-negotiation enabled, which is not generally desirable with Linux networking drivers. Add handling to enable 1000BaseX auto-negotiation when these modules are used in 1000BaseX mode. Also, some special handling is required to ensure that 1000BaseT auto-negotiation is enabled properly when desired. Based on existing handling in the AMD xgbe driver and the information in the Finisar FAQ: https://www.finisar.com/sites/default/files/resources/an-2036_1000base-t_sfp_faqreve1.pdf Signed-off-by: Robert Hancock Reviewed-by: Russell King Link: https://lore.kernel.org/r/20201028171540.1700032-1-robert.hancock@calian.com Signed-off-by: Jakub Kicinski --- include/linux/marvell_phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index ff7b7607c8cf..52b1610eae68 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -25,6 +25,9 @@ #define MARVELL_PHY_ID_88X3310 0x002b09a0 #define MARVELL_PHY_ID_88E2110 0x002b09b0 +/* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ +#define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 + /* The MV88e6390 Ethernet switch contains embedded PHYs. These PHYs do * not have a model ID. So the switch driver traps reads to the ID2 * register and returns the switch family ID -- cgit v1.2.3 From 955062b03fa62b802a1ee34fbb04e39f7a70ae73 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 29 Oct 2020 01:38:31 +0200 Subject: net: bridge: mcast: add support for raw L2 multicast groups Extend the bridge multicast control and data path to configure routes for L2 (non-IP) multicast groups. The uapi struct br_mdb_entry union u is extended with another variant, mac_addr, which does not change the structure size, and which is valid when the proto field is zero. To be compatible with the forwarding code that is already in place, which acts as an IGMP/MLD snooping bridge with querier capabilities, we need to declare that for L2 MDB entries (for which there exists no such thing as IGMP/MLD snooping/querying), that there is always a querier. Otherwise, these entries would be flooded to all bridge ports and not just to those that are members of the L2 multicast group. Needless to say, only permanent L2 multicast groups can be installed on a bridge port. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20201028233831.610076-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- include/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 556caed00258..b979005ea39c 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -25,6 +25,7 @@ struct br_ip { #if IS_ENABLED(CONFIG_IPV6) struct in6_addr ip6; #endif + unsigned char mac_addr[ETH_ALEN]; } dst; __be16 proto; __u16 vid; -- cgit v1.2.3 From ccf0a4b7fc688561428290265e4effde41446668 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 29 Oct 2020 16:39:48 +0100 Subject: netfilter: ipset: Add bucketsize parameter to all hash types The parameter defines the upper limit in any hash bucket at adding new entries from userspace - if the limit would be exceeded, ipset doubles the hash size and rehashes. It means the set may consume more memory but gives faster evaluation at matching in the set. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index ab192720e2d6..46d9a0c26c67 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -198,6 +198,9 @@ struct ip_set_region { u32 elements; /* Number of elements vs timeout */ }; +/* The max revision number supported by any set type + 1 */ +#define IPSET_REVISION_MAX 9 + /* The core set type structure */ struct ip_set_type { struct list_head list; @@ -215,6 +218,8 @@ struct ip_set_type { u8 family; /* Type revisions */ u8 revision_min, revision_max; + /* Revision-specific supported (create) flags */ + u8 create_flags[IPSET_REVISION_MAX+1]; /* Set features to control swapping */ u16 features; -- cgit v1.2.3 From 3ebc0ef06e4a78522e9d1488dcf61b7d8fcfb792 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 11 Sep 2020 16:33:42 +0200 Subject: serial: s3c: Update path of Samsung S3C machine file Correct the path to Samsung S3C24xx machine file, mentioned in documentation. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20200911143343.498-2-krzk@kernel.org --- include/linux/serial_s3c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_s3c.h b/include/linux/serial_s3c.h index 463ed28d2b27..ca2c5393dc6b 100644 --- a/include/linux/serial_s3c.h +++ b/include/linux/serial_s3c.h @@ -254,7 +254,7 @@ * serial port * * the pointer is setup by the machine specific initialisation from the - * arch/arm/mach-s3c2410/ directory. + * arch/arm/mach-s3c/ directory. */ struct s3c2410_uartcfg { -- cgit v1.2.3 From 3a096c2bda7d2f0c0866d466447c41bc4b8ecdec Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:33:32 +0200 Subject: iio: fix a kernel-doc markup A function has a different name between their prototype and its kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/46622c3bdcffb76e79719f0fe5011c2952960b32.1603469755.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Cameron --- include/linux/iio/trigger.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index cad8325903f9..f930c4ccf378 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -97,7 +97,7 @@ static inline struct iio_trigger *iio_trigger_get(struct iio_trigger *trig) } /** - * iio_device_set_drvdata() - Set trigger driver data + * iio_trigger_set_drvdata() - Set trigger driver data * @trig: IIO trigger structure * @data: Driver specific data * -- cgit v1.2.3 From d3fd65484c78102bac92f176c53537a7691a38b2 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 29 Oct 2020 18:29:59 +0100 Subject: net: core: add dev_sw_netstats_tx_add Add dev_sw_netstats_tx_add(), complementing already existing dev_sw_netstats_rx_add(). Other than dev_sw_netstats_rx_add allow to pass the number of packets as function argument. Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 964b494b0e8d..568fab708ac6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2557,6 +2557,18 @@ static inline void dev_sw_netstats_rx_add(struct net_device *dev, unsigned int l u64_stats_update_end(&tstats->syncp); } +static inline void dev_sw_netstats_tx_add(struct net_device *dev, + unsigned int packets, + unsigned int len) +{ + struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += len; + tstats->tx_packets += packets; + u64_stats_update_end(&tstats->syncp); +} + static inline void dev_lstats_add(struct net_device *dev, unsigned int len) { struct pcpu_lstats *lstats = this_cpu_ptr(dev->lstats); -- cgit v1.2.3 From 81b01894d792db8d4746c9c1b39c6e2a94fa0a04 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 29 Oct 2020 18:31:21 +0100 Subject: net: core: add devm_netdev_alloc_pcpu_stats We have netdev_alloc_pcpu_stats(), and we have devm_alloc_percpu(). Add a managed version of netdev_alloc_pcpu_stats, e.g. for allocating the per-cpu stats in the probe() callback of a driver. It needs to be a macro for dealing properly with the type argument. Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 568fab708ac6..a53ed2d1ed1d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2596,6 +2596,20 @@ static inline void dev_lstats_add(struct net_device *dev, unsigned int len) #define netdev_alloc_pcpu_stats(type) \ __netdev_alloc_pcpu_stats(type, GFP_KERNEL) +#define devm_netdev_alloc_pcpu_stats(dev, type) \ +({ \ + typeof(type) __percpu *pcpu_stats = devm_alloc_percpu(dev, type);\ + if (pcpu_stats) { \ + int __cpu; \ + for_each_possible_cpu(__cpu) { \ + typeof(type) *stat; \ + stat = per_cpu_ptr(pcpu_stats, __cpu); \ + u64_stats_init(&stat->syncp); \ + } \ + } \ + pcpu_stats; \ +}) + enum netdev_lag_tx_type { NETDEV_LAG_TX_TYPE_UNKNOWN, NETDEV_LAG_TX_TYPE_RANDOM, -- cgit v1.2.3 From 1c552e08b29895d31bbf82bdb549811cfde31db4 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:02 -0700 Subject: firmware: ti_sci: rm: Add support for tx_tdtype parameter for tx channel The system controller's resource manager have support for configuring the TDTYPE of TCHAN_CFG register on j721e. With this parameter the teardown completion can be controlled: TDTYPE == 0: Return without waiting for peer to complete the teardown TDTYPE == 1: Wait for peer to complete the teardown Signed-off-by: Peter Ujfalusi Reviewed-by: Tero Kristo Tested-by: Keerthy Reviewed-by: Grygorii Strashko Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/ti_sci_protocol.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index cf27b080e148..d254d99fd45b 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -345,6 +345,7 @@ struct ti_sci_msg_rm_udmap_tx_ch_cfg { #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_SUPR_TDPKT_VALID BIT(11) #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_CREDIT_COUNT_VALID BIT(12) #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FDEPTH_VALID BIT(13) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_TDTYPE_VALID BIT(15) u16 nav_id; u16 index; u8 tx_pause_on_err; @@ -362,6 +363,7 @@ struct ti_sci_msg_rm_udmap_tx_ch_cfg { u16 fdepth; u8 tx_sched_priority; u8 tx_burst_size; + u8 tx_tdtype; }; /** -- cgit v1.2.3 From 967a020bd3deddf9a0af73aeb4d4b46d90030937 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:03 -0700 Subject: firmware: ti_sci: Use struct ti_sci_resource_desc in get_range ops Use the ti_sci_resource_desc directly and update it's start and num members directly instead of requiring individual parameters for them. This will allow easy extension of the RM parameters without changing API. Signed-off-by: Peter Ujfalusi Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/ti_sci_protocol.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index d254d99fd45b..6cd537db4d33 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -195,6 +195,18 @@ struct ti_sci_clk_ops { u64 *current_freq); }; +/** + * struct ti_sci_resource_desc - Description of TI SCI resource instance range. + * @start: Start index of the resource. + * @num: Number of resources. + * @res_map: Bitmap to manage the allocation of these resources. + */ +struct ti_sci_resource_desc { + u16 start; + u16 num; + unsigned long *res_map; +}; + /** * struct ti_sci_rm_core_ops - Resource management core operations * @get_range: Get a range of resources belonging to ti sci host. @@ -209,15 +221,15 @@ struct ti_sci_clk_ops { * - dev_id: TISCI device ID. * - subtype: Resource assignment subtype that is being requested * from the given device. - * - range_start: Start index of the resource range - * - range_end: Number of resources in the range + * - desc: Pointer to ti_sci_resource_desc to be updated with the resource + * range start index and number of resources */ struct ti_sci_rm_core_ops { int (*get_range)(const struct ti_sci_handle *handle, u32 dev_id, - u8 subtype, u16 *range_start, u16 *range_num); + u8 subtype, struct ti_sci_resource_desc *desc); int (*get_range_from_shost)(const struct ti_sci_handle *handle, u32 dev_id, u8 subtype, u8 s_host, - u16 *range_start, u16 *range_num); + struct ti_sci_resource_desc *desc); }; #define TI_SCI_RESASG_SUBTYPE_IR_OUTPUT 0 @@ -522,18 +534,6 @@ struct ti_sci_handle { #define TI_SCI_RESOURCE_NULL 0xffff -/** - * struct ti_sci_resource_desc - Description of TI SCI resource instance range. - * @start: Start index of the resource. - * @num: Number of resources. - * @res_map: Bitmap to manage the allocation of these resources. - */ -struct ti_sci_resource_desc { - u16 start; - u16 num; - unsigned long *res_map; -}; - /** * struct ti_sci_resource - Structure representing a resource assigned * to a device. -- cgit v1.2.3 From 519c5c0c558b529f835c9bb30f9a1eb2034d585c Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:03 -0700 Subject: firmware: ti_sci: rm: Add support for second resource range Sysfw added support for a second range in the resource range API to be able to describe complex allocations mainly for DMA channels. Update the ti_sci part to consider the second range as well. Signed-off-by: Peter Ujfalusi Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/ti_sci_protocol.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 6cd537db4d33..9699b260de59 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -197,13 +197,17 @@ struct ti_sci_clk_ops { /** * struct ti_sci_resource_desc - Description of TI SCI resource instance range. - * @start: Start index of the resource. - * @num: Number of resources. + * @start: Start index of the first resource range. + * @num: Number of resources in the first range. + * @start_sec: Start index of the second resource range. + * @num_sec: Number of resources in the second range. * @res_map: Bitmap to manage the allocation of these resources. */ struct ti_sci_resource_desc { u16 start; u16 num; + u16 start_sec; + u16 num_sec; unsigned long *res_map; }; -- cgit v1.2.3 From ce1feed58534d8489afb4900bee75dff15d950e0 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:05 -0700 Subject: firmware: ti_sci: rm: Add support for extended_ch_type for tx channel Sysfw added 'extended_ch_type' to the tx_ch_cfg_req message which should be used when BCDMA block copy channels are configured: extended_ch_type = 0 : the channel is split tx channel (tchan) extended_ch_type = 1 : the channel is block copy channel (bchan) Signed-off-by: Peter Ujfalusi Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/ti_sci_protocol.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 9699b260de59..6978afc00823 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -336,6 +336,9 @@ struct ti_sci_rm_psil_ops { #define TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_128_BYTES 2 #define TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_256_BYTES 3 +#define TI_SCI_RM_BCDMA_EXTENDED_CH_TYPE_TCHAN 0 +#define TI_SCI_RM_BCDMA_EXTENDED_CH_TYPE_BCHAN 1 + /* UDMAP TX/RX channel valid_params common declarations */ #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID BIT(0) #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_ATYPE_VALID BIT(1) @@ -362,6 +365,7 @@ struct ti_sci_msg_rm_udmap_tx_ch_cfg { #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_CREDIT_COUNT_VALID BIT(12) #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FDEPTH_VALID BIT(13) #define TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_TDTYPE_VALID BIT(15) +#define TI_SCI_MSG_VALUE_RM_UDMAP_CH_EXTENDED_CH_TYPE_VALID BIT(16) u16 nav_id; u16 index; u8 tx_pause_on_err; @@ -380,6 +384,7 @@ struct ti_sci_msg_rm_udmap_tx_ch_cfg { u8 tx_sched_priority; u8 tx_burst_size; u8 tx_tdtype; + u8 extended_ch_type; }; /** -- cgit v1.2.3 From 4d8ddf673a420aa25668eceeb4fbf33e2521fdf2 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:05 -0700 Subject: firmware: ti_sci: rm: Remove ring_get_config support The ring_get_cfg (0x1111 message) is not used and it is not supported by sysfw for a long time. Signed-off-by: Peter Ujfalusi Reviewed-by: Grygorii Strashko Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/ti_sci_protocol.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 6978afc00823..6710d7ac7a72 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -286,8 +286,6 @@ struct ti_sci_rm_irq_ops { /** * struct ti_sci_rm_ringacc_ops - Ring Accelerator Management operations * @config: configure the SoC Navigator Subsystem Ring Accelerator ring - * @get_config: get the SoC Navigator Subsystem Ring Accelerator ring - * configuration */ struct ti_sci_rm_ringacc_ops { int (*config)(const struct ti_sci_handle *handle, @@ -295,10 +293,6 @@ struct ti_sci_rm_ringacc_ops { u32 addr_lo, u32 addr_hi, u32 count, u8 mode, u8 size, u8 order_id ); - int (*get_config)(const struct ti_sci_handle *handle, - u32 nav_id, u32 index, u8 *mode, - u32 *addr_lo, u32 *addr_hi, u32 *count, - u8 *size, u8 *order_id); }; /** -- cgit v1.2.3 From 3c2017536f3a122bf246cc87f9327e9ec138db92 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:06 -0700 Subject: firmware: ti_sci: rm: Add new ops for ring configuration The sysfw ring configuration message has been extended to include virtid and asel value for the ring. Add the ASEL_VALID to TI_SCI_MSG_VALUE_RM_ALL_NO_ORDER as it is required for DMA rings. Instead of extending the current .config() ops - which would need same patch change in the ringacc driver - add ti_sci_msg_rm_ring_cfg struct and a new ops using it to configure the ring. This will allow easy update path in case new members are added for the ring configuration. Signed-off-by: Peter Ujfalusi Reviewed-by: Grygorii Strashko Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/ti_sci_protocol.h | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 6710d7ac7a72..d1711050cd9d 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -275,17 +275,44 @@ struct ti_sci_rm_irq_ops { #define TI_SCI_MSG_VALUE_RM_RING_SIZE_VALID BIT(4) /* RA config.order_id parameter is valid for RM ring configure TISCI message */ #define TI_SCI_MSG_VALUE_RM_RING_ORDER_ID_VALID BIT(5) +/* RA config.virtid parameter is valid for RM ring configure TISCI message */ +#define TI_SCI_MSG_VALUE_RM_RING_VIRTID_VALID BIT(6) +/* RA config.asel parameter is valid for RM ring configure TISCI message */ +#define TI_SCI_MSG_VALUE_RM_RING_ASEL_VALID BIT(7) #define TI_SCI_MSG_VALUE_RM_ALL_NO_ORDER \ (TI_SCI_MSG_VALUE_RM_RING_ADDR_LO_VALID | \ TI_SCI_MSG_VALUE_RM_RING_ADDR_HI_VALID | \ TI_SCI_MSG_VALUE_RM_RING_COUNT_VALID | \ TI_SCI_MSG_VALUE_RM_RING_MODE_VALID | \ - TI_SCI_MSG_VALUE_RM_RING_SIZE_VALID) + TI_SCI_MSG_VALUE_RM_RING_SIZE_VALID | \ + TI_SCI_MSG_VALUE_RM_RING_ASEL_VALID) + +/** + * struct ti_sci_msg_rm_ring_cfg - Ring configuration + * + * Parameters for Navigator Subsystem ring configuration + * See @ti_sci_msg_rm_ring_cfg_req + */ +struct ti_sci_msg_rm_ring_cfg { + u32 valid_params; + u16 nav_id; + u16 index; + u32 addr_lo; + u32 addr_hi; + u32 count; + u8 mode; + u8 size; + u8 order_id; + u16 virtid; + u8 asel; +}; /** * struct ti_sci_rm_ringacc_ops - Ring Accelerator Management operations * @config: configure the SoC Navigator Subsystem Ring Accelerator ring + * Deprecated + * @set_cfg: configure the SoC Navigator Subsystem Ring Accelerator ring */ struct ti_sci_rm_ringacc_ops { int (*config)(const struct ti_sci_handle *handle, @@ -293,6 +320,8 @@ struct ti_sci_rm_ringacc_ops { u32 addr_lo, u32 addr_hi, u32 count, u8 mode, u8 size, u8 order_id ); + int (*set_cfg)(const struct ti_sci_handle *handle, + const struct ti_sci_msg_rm_ring_cfg *params); }; /** -- cgit v1.2.3 From fed7552f1e69296461fca62ebaa0bb5a06fec0df Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:07 -0700 Subject: firmware: ti_sci: rm: Remove unused config() from ti_sci_rm_ringacc_ops The ringacc driver has been converted to use the new set_cfg function to configure the ring, the old config ops can be removed. Signed-off-by: Peter Ujfalusi Reviewed-by: Grygorii Strashko Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/ti_sci_protocol.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index d1711050cd9d..0aad7009b50e 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -310,16 +310,9 @@ struct ti_sci_msg_rm_ring_cfg { /** * struct ti_sci_rm_ringacc_ops - Ring Accelerator Management operations - * @config: configure the SoC Navigator Subsystem Ring Accelerator ring - * Deprecated * @set_cfg: configure the SoC Navigator Subsystem Ring Accelerator ring */ struct ti_sci_rm_ringacc_ops { - int (*config)(const struct ti_sci_handle *handle, - u32 valid_params, u16 nav_id, u16 index, - u32 addr_lo, u32 addr_hi, u32 count, u8 mode, - u8 size, u8 order_id - ); int (*set_cfg)(const struct ti_sci_handle *handle, const struct ti_sci_msg_rm_ring_cfg *params); }; -- cgit v1.2.3 From 8c42379e40e2db4199ceeb6a6ef9fff73ff132cf Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sun, 25 Oct 2020 12:10:22 -0700 Subject: soc: ti: k3-ringacc: Use correct device for allocation in RING mode In RING mode the ringacc does not access the ring memory. In this access mode the ringacc coherency does not have meaning. If the ring is configured in RING mode, then the ringacc itself will not access to the ring memory. Only the requester (user) of the ring is going to read/write to the memory. Extend the ring configuration parameters with a device pointer to be used for DMA API when the ring is configured in RING mode. Extending the ring configuration struct will allow per ring selection of device to be used for allocation, thus allowing per ring coherency. To avoid regression, fall back to use the ringacc dev in case the alloc_dev is not provided. Signed-off-by: Peter Ujfalusi Reviewed-by: Grygorii Strashko Signed-off-by: Santosh Shilimkar --- include/linux/soc/ti/k3-ringacc.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/ti/k3-ringacc.h b/include/linux/soc/ti/k3-ringacc.h index 5a472eca5ee4..658dc71d2901 100644 --- a/include/linux/soc/ti/k3-ringacc.h +++ b/include/linux/soc/ti/k3-ringacc.h @@ -67,6 +67,9 @@ struct k3_ring; * few times. It's usable when the same ring is used as Free Host PD ring * for different flows, for example. * Note: Locking should be done by consumer if required + * @dma_dev: Master device which is using and accessing to the ring + * memory when the mode is K3_RINGACC_RING_MODE_RING. Memory allocations + * should be done using this device. */ struct k3_ring_cfg { u32 size; @@ -74,6 +77,8 @@ struct k3_ring_cfg { enum k3_ring_mode mode; #define K3_RINGACC_RING_SHARED BIT(1) u32 flags; + + struct device *dma_dev; }; #define K3_RINGACC_RING_ID_ANY (-1) -- cgit v1.2.3 From f51778db088b2407ec177f2f4da0f6290602aa3f Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 2 Nov 2020 12:43:27 +1100 Subject: swiotlb: using SIZE_MAX needs limits.h included After merging the drm-misc tree, linux-next build (arm multi_v7_defconfig) failed like this: In file included from drivers/gpu/drm/nouveau/nouveau_ttm.c:26: include/linux/swiotlb.h: In function 'swiotlb_max_mapping_size': include/linux/swiotlb.h:99:9: error: 'SIZE_MAX' undeclared (first use in this function) 99 | return SIZE_MAX; | ^~~~~~~~ include/linux/swiotlb.h:7:1: note: 'SIZE_MAX' is defined in header ''; did you forget to '#include '? 6 | #include +++ |+#include 7 | #include include/linux/swiotlb.h:99:9: note: each undeclared identifier is reported only once for each function it appears in 99 | return SIZE_MAX; | ^~~~~~~~ Caused by commit abe420bfae52 ("swiotlb: Introduce swiotlb_max_mapping_size()") but only exposed by commit "drm/nouveu: fix swiotlb include" Fix it by including linux/limits.h as appropriate. Fixes: abe420bfae52 ("swiotlb: Introduce swiotlb_max_mapping_size()") Signed-off-by: Stephen Rothwell Link: https://lore.kernel.org/r/20201102124327.2f82b2a7@canb.auug.org.au Signed-off-by: Michael S. Tsirkin --- include/linux/swiotlb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 046bb94bd4d6..fa5122c6711e 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -5,6 +5,7 @@ #include #include #include +#include struct device; struct page; -- cgit v1.2.3 From 7a60c2dd0f575ab14a457e99582af0ca1e072a74 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 28 Oct 2020 16:15:26 -0300 Subject: drm: Remove SCATTERLIST_MAX_SEGMENT Since commit 9a40401cfa13 ("lib/scatterlist: Do not limit max_segment to PAGE_ALIGNED values") the max_segment input to sg_alloc_table_from_pages() does not have to be any special value. The new algorithm will always create something less than what the user provides. Thus eliminate this confusing constant. - vmwgfx should use the HW capability, not mix in the OS page size for calling dma_set_max_seg_size() - i915 uses i915_sg_segment_size() both for sg_alloc_table_from_pages and for some open coded sgl construction. This doesn't change the value since rounddown(size, UINT_MAX) == SCATTERLIST_MAX_SEGMENT - drm_prime_pages_to_sg uses it as a default if max_segment is zero, UINT_MAX is fine to use directly. Cc: Gerd Hoffmann Cc: Daniel Vetter Cc: Thomas Hellstrom Cc: Qian Cai Cc: "Ursulin, Tvrtko" Suggested-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/0-v1-44733fccd781+13d-rm_scatterlist_max_jgg@nvidia.com --- include/linux/scatterlist.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 36c47e7e66a2..6f70572b2938 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -18,12 +18,6 @@ struct scatterlist { #endif }; -/* - * Since the above length field is an unsigned int, below we define the maximum - * length in bytes that can be stored in one scatterlist entry. - */ -#define SCATTERLIST_MAX_SEGMENT (UINT_MAX & PAGE_MASK) - /* * These macros should be used after a dma_map_sg call has been done * to get bus addresses of each of the SG entries and their lengths. -- cgit v1.2.3 From fc0021aa340af65a0a37d77be39e22aa886a6132 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 Oct 2020 08:33:09 +0200 Subject: swiotlb: remove the tbl_dma_addr argument to swiotlb_tbl_map_single The tbl_dma_addr argument is used to check the DMA boundary for the allocations, and thus needs to be a dma_addr_t. swiotlb-xen instead passed a physical address, which could lead to incorrect results for strange offsets. Fix this by removing the parameter entirely and hard code the DMA address for io_tlb_start instead. Fixes: 91ffe4ad534a ("swiotlb-xen: introduce phys_to_dma/dma_to_phys translations") Signed-off-by: Christoph Hellwig Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 513913ff7486..3bb72266a75a 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -45,13 +45,9 @@ enum dma_sync_target { SYNC_FOR_DEVICE = 1, }; -extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, - dma_addr_t tbl_dma_addr, - phys_addr_t phys, - size_t mapping_size, - size_t alloc_size, - enum dma_data_direction dir, - unsigned long attrs); +phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, + size_t mapping_size, size_t alloc_size, + enum dma_data_direction dir, unsigned long attrs); extern void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, -- cgit v1.2.3 From e0e398e204634db8fb71bd89cf2f6e3e5bd09b51 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 21 Oct 2020 21:12:15 +0200 Subject: PM: runtime: Drop runtime PM references to supplier on link removal While removing a device link, drop the supplier device's runtime PM usage counter as many times as needed to drop all of the runtime PM references to it from the consumer in addition to dropping the consumer's link count. Fixes: baa8809f6097 ("PM / runtime: Optimize the use of device links") Signed-off-by: Rafael J. Wysocki Cc: 5.1+ # 5.1+ Tested-by: Xiang Chen Reviewed-by: Greg Kroah-Hartman --- include/linux/pm_runtime.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 18b02dcc168e..eadc1fdebce6 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -58,7 +58,7 @@ extern void pm_runtime_clean_up_links(struct device *dev); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); -extern void pm_runtime_drop_link(struct device *dev); +extern void pm_runtime_drop_link(struct device_link *link); /** * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. @@ -280,7 +280,7 @@ static inline void pm_runtime_clean_up_links(struct device *dev) {} static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} -static inline void pm_runtime_drop_link(struct device *dev) {} +static inline void pm_runtime_drop_link(struct device_link *link) {} #endif /* !CONFIG_PM */ -- cgit v1.2.3 From d6e36668598154820177bfd78c1621d8e6c580a2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 21 Oct 2020 21:13:10 +0200 Subject: PM: runtime: Drop pm_runtime_clean_up_links() After commit d12544fb2aa9 ("PM: runtime: Remove link state checks in rpm_get/put_supplier()") nothing prevents the consumer device's runtime PM from acquiring additional references to the supplier device after pm_runtime_clean_up_links() has run (or even while it is running), so calling this function from __device_release_driver() may be pointless (or even harmful). Moreover, it ignores stateless device links, so the runtime PM handling of managed and stateless device links is inconsistent because of it, so better get rid of it entirely. Fixes: d12544fb2aa9 ("PM: runtime: Remove link state checks in rpm_get/put_supplier()") Signed-off-by: Rafael J. Wysocki Cc: 5.1+ # 5.1+ Tested-by: Xiang Chen Reviewed-by: Greg Kroah-Hartman --- include/linux/pm_runtime.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index eadc1fdebce6..4b708f4e8eed 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -54,7 +54,6 @@ extern u64 pm_runtime_autosuspend_expiration(struct device *dev); extern void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); -extern void pm_runtime_clean_up_links(struct device *dev); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); @@ -276,7 +275,6 @@ static inline u64 pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } static inline void pm_runtime_set_memalloc_noio(struct device *dev, bool enable){} -static inline void pm_runtime_clean_up_links(struct device *dev) {} static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} -- cgit v1.2.3 From 56a7ff75cd08987812209971e319f78156ea2bb1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Oct 2020 13:57:45 +0200 Subject: cpufreq: Drop restore_freq from struct cpufreq_policy The restore_freq field in struct cpufreq_policy is only used by __target_index() in one place and a local variable in that function may as well be used instead of it, so drop it and modify __target_index() accordingly. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1eaa04f1bae6..9779a6cd8baa 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -65,7 +65,6 @@ struct cpufreq_policy { unsigned int max; /* in kHz */ unsigned int cur; /* in kHz, only needed if cpufreq * governors are used */ - unsigned int restore_freq; /* = policy->cur before transition */ unsigned int suspend_freq; /* freq to set during suspend */ unsigned int policy; /* see above */ @@ -308,10 +307,6 @@ struct cpufreq_driver { /* define one out of two */ int (*setpolicy)(struct cpufreq_policy *policy); - /* - * On failure, should always restore frequency to policy->restore_freq - * (i.e. old freq). - */ int (*target)(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation); /* Deprecated */ -- cgit v1.2.3 From b000d5cb954fe25ac1ea929ae6da321033ace927 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 13 Oct 2020 10:18:04 +0200 Subject: ima: defer arch_ima_get_secureboot() call to IMA init time Chester reports that it is necessary to introduce a new way to pass the EFI secure boot status between the EFI stub and the core kernel on ARM systems. The usual way of obtaining this information is by checking the SecureBoot and SetupMode EFI variables, but this can only be done after the EFI variable workqueue is created, which occurs in a subsys_initcall(), whereas arch_ima_get_secureboot() is called much earlier by the IMA framework. However, the IMA framework itself is started as a late_initcall, and the only reason the call to arch_ima_get_secureboot() occurs so early is because it happens in the context of a __setup() callback that parses the ima_appraise= command line parameter. So let's refactor this code a little bit, by using a core_param() callback to capture the command line argument, and deferring any reasoning based on its contents to the IMA init routine. Cc: Chester Lin Cc: Dmitry Kasatkin Cc: James Morris Cc: "Serge E. Hallyn" Link: https://lore.kernel.org/linux-arm-kernel/20200904072905.25332-2-clin@suse.com/ Signed-off-by: Ard Biesheuvel Reported-by: kernel test robot [missing core_param()] [zohar@linux.ibm.com: included linux/module.h] Tested-by: Chester Lin Signed-off-by: Mimi Zohar --- include/linux/ima.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 8fa7bcfb2da2..ac3d82f962f2 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -31,6 +31,12 @@ extern void ima_post_path_mknod(struct dentry *dentry); extern int ima_file_hash(struct file *file, char *buf, size_t buf_size); extern void ima_kexec_cmdline(int kernel_fd, const void *buf, int size); +#ifdef CONFIG_IMA_APPRAISE_BOOTPARAM +extern void ima_appraise_parse_cmdline(void); +#else +static inline void ima_appraise_parse_cmdline(void) {} +#endif + #ifdef CONFIG_IMA_KEXEC extern void ima_add_kexec_buffer(struct kimage *image); #endif -- cgit v1.2.3 From 24269999027e6b161c0078ad9c1557f9a1575128 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:32:57 +0200 Subject: EDAC: Fix some kernel-doc markups Kernel-doc markup should use this format: identifier - description Correct that and also fix some enums' names in the kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/1d291393ba58c7b80908a3fedf02d2f53921ffe9.1603469755.git.mchehab+huawei@kernel.org --- include/linux/edac.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 15e8f3d8a895..52d7487f6bd4 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -229,7 +229,7 @@ enum mem_type { #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) /** - * enum edac-type - Error Detection and Correction capabilities and mode + * enum edac_type - Error Detection and Correction capabilities and mode * @EDAC_UNKNOWN: Unknown if ECC is available * @EDAC_NONE: Doesn't support ECC * @EDAC_RESERVED: Reserved ECC type @@ -309,7 +309,7 @@ enum scrub_type { #define OP_OFFLINE 0x300 /** - * enum edac_mc_layer - memory controller hierarchy layer + * enum edac_mc_layer_type - memory controller hierarchy layer * * @EDAC_MC_LAYER_BRANCH: memory layer is named "branch" * @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel" -- cgit v1.2.3 From f8f6ae5d077a9bdaf5cbf2ac960a5d1a04b47482 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 1 Nov 2020 17:08:00 -0800 Subject: mm: always have io_remap_pfn_range() set pgprot_decrypted() The purpose of io_remap_pfn_range() is to map IO memory, such as a memory mapped IO exposed through a PCI BAR. IO devices do not understand encryption, so this memory must always be decrypted. Automatically call pgprot_decrypted() as part of the generic implementation. This fixes a bug where enabling AMD SME causes subsystems, such as RDMA, using io_remap_pfn_range() to expose BAR pages to user space to fail. The CPU will encrypt access to those BAR pages instead of passing unencrypted IO directly to the device. Places not mapping IO should use remap_pfn_range(). Fixes: aca20d546214 ("x86/mm: Add support to make use of Secure Memory Encryption") Signed-off-by: Jason Gunthorpe Signed-off-by: Andrew Morton Cc: Arnd Bergmann Cc: Tom Lendacky Cc: Thomas Gleixner Cc: Andrey Ryabinin Cc: Borislav Petkov Cc: Brijesh Singh Cc: Jonathan Corbet Cc: Dmitry Vyukov Cc: "Dave Young" Cc: Alexander Potapenko Cc: Konrad Rzeszutek Wilk Cc: Andy Lutomirski Cc: Larry Woodman Cc: Matt Fleming Cc: Ingo Molnar Cc: "Michael S. Tsirkin" Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Rik van Riel Cc: Toshimitsu Kani Cc: Link: https://lkml.kernel.org/r/0-v1-025d64bdf6c4+e-amd_sme_fix_jgg@nvidia.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 9 +++++++++ include/linux/pgtable.h | 4 ---- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ef360fe70aaf..db6ae4d3fb4e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2759,6 +2759,15 @@ static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma, return VM_FAULT_NOPAGE; } +#ifndef io_remap_pfn_range +static inline int io_remap_pfn_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + return remap_pfn_range(vma, addr, pfn, size, pgprot_decrypted(prot)); +} +#endif + static inline vm_fault_t vmf_error(int err) { if (err == -ENOMEM) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 38c33eabea89..71125a4676c4 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1427,10 +1427,6 @@ typedef unsigned int pgtbl_mod_mask; #endif /* !__ASSEMBLY__ */ -#ifndef io_remap_pfn_range -#define io_remap_pfn_range remap_pfn_range -#endif - #ifndef has_transparent_hugepage #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define has_transparent_hugepage() 1 -- cgit v1.2.3 From 9f14cb030d987ae5e201e88cd345c6d772bcce51 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 16 Sep 2020 11:45:22 -0700 Subject: sched: Un-hide lockdep_tasklist_lock_is_held() for !LOCKDEP Currently, variables used only within lockdep expressions are flagged as unused, requiring that these variables' declarations be decorated with either #ifdef or __maybe_unused. This results in ugly code. This commit therefore causes the lockdep_tasklist_lock_is_held() function to be visible even when lockdep is not enabled, thus removing the need for these decorations. This approach further relies on dead-code elimination to remove any references to functions or variables that are not available in non-lockdep kernels. Signed-off-by: Jakub Kicinski Signed-off-by: Paul E. McKenney --- include/linux/sched/task.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 85fb2f34c59b..c0f71f2e7160 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -47,9 +47,7 @@ extern spinlock_t mmlist_lock; extern union thread_union init_thread_union; extern struct task_struct init_task; -#ifdef CONFIG_PROVE_RCU extern int lockdep_tasklist_lock_is_held(void); -#endif /* #ifdef CONFIG_PROVE_RCU */ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); -- cgit v1.2.3 From 891cd1f99dd94746f0caf5eea0121079178ee9bf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 16 Sep 2020 11:45:23 -0700 Subject: rcu: Un-hide lockdep maps for !LOCKDEP Currently, variables used only within lockdep expressions are flagged as unused, requiring that these variables' declarations be decorated with either #ifdef or __maybe_unused. This results in ugly code. This commit therefore causes the RCU lock maps to be visible even when lockdep is not enabled, thus removing the need for these decorations. This approach further relies on dead-code elimination to remove any references to functions or variables that are not available in non-lockdep kernels. Signed-off-by: Jakub Kicinski Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 9 +++++---- include/linux/rcupdate_trace.h | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6cdd0152c253..f9533bbcbb36 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -241,6 +241,11 @@ bool rcu_lockdep_current_cpu_online(void); static inline bool rcu_lockdep_current_cpu_online(void) { return true; } #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ +extern struct lockdep_map rcu_lock_map; +extern struct lockdep_map rcu_bh_lock_map; +extern struct lockdep_map rcu_sched_lock_map; +extern struct lockdep_map rcu_callback_map; + #ifdef CONFIG_DEBUG_LOCK_ALLOC static inline void rcu_lock_acquire(struct lockdep_map *map) @@ -253,10 +258,6 @@ static inline void rcu_lock_release(struct lockdep_map *map) lock_release(map, _THIS_IP_); } -extern struct lockdep_map rcu_lock_map; -extern struct lockdep_map rcu_bh_lock_map; -extern struct lockdep_map rcu_sched_lock_map; -extern struct lockdep_map rcu_callback_map; int debug_lockdep_rcu_enabled(void); int rcu_read_lock_held(void); int rcu_read_lock_bh_held(void); diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index 3e7919fc5f34..86c8f6c98412 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -11,10 +11,10 @@ #include #include -#ifdef CONFIG_DEBUG_LOCK_ALLOC - extern struct lockdep_map rcu_trace_lock_map; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + static inline int rcu_read_lock_trace_held(void) { return lock_is_held(&rcu_trace_lock_map); -- cgit v1.2.3 From cd539cff9470fe1dacf0bf5ab3f54f37b854d6fc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 16 Sep 2020 11:45:27 -0700 Subject: lockdep: Provide dummy forward declaration of *_is_held() helpers When CONFIG_LOCKDEP is not set, lock_is_held() and lockdep_is_held() are not declared or defined. This forces all callers to use #ifdefs around these checks. Recent RCU changes added a lot of lockdep_is_held() calls inside rcu_dereference_protected(). This macro hides its argument on !LOCKDEP builds, which can lead to false-positive unused-variable warnings. This commit therefore provides forward declarations of lock_is_held() and lockdep_is_held() but without defining them. This way callers (including those internal to RCU) can keep them visible to the compiler on !LOCKDEP builds and instead depend on dead code elimination to remove the references, which in turn prevents the linker from complaining about the lack of the corresponding function definitions. [ paulmck: Apply Peter Zijlstra feedback on "extern". ] Signed-off-by: Jakub Kicinski -- CC: peterz@infradead.org CC: mingo@redhat.com CC: will@kernel.org Signed-off-by: Paul E. McKenney --- include/linux/lockdep.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index f5594879175a..ccc3ce66c7e0 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -375,6 +375,12 @@ static inline void lockdep_unregister_key(struct lock_class_key *key) #define lockdep_depth(tsk) (0) +/* + * Dummy forward declarations, allow users to write less ifdef-y code + * and depend on dead code elimination. + */ +extern int lock_is_held(const void *); +extern int lockdep_is_held(const void *); #define lockdep_is_held_type(l, r) (1) #define lockdep_assert_held(l) do { (void)(l); } while (0) -- cgit v1.2.3 From 65e9eb1ccfe56b41a0d8bfec651ea014968413cb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 16 Sep 2020 11:45:28 -0700 Subject: rcu: Prevent RCU_LOCKDEP_WARN() from swallowing the condition We run into a unused variable warning in bridge code when variable is only used inside the condition of rcu_dereference_protected(). #define mlock_dereference(X, br) \ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) Since on builds with CONFIG_PROVE_RCU=n rcu_dereference_protected() compiles to nothing the compiler doesn't see the variable use. This commit therefore prevents this warning by adding the condition as dead code. Signed-off-by: Jakub Kicinski -- CC: paulmck@kernel.org CC: josh@joshtriplett.org CC: rostedt@goodmis.org CC: mathieu.desnoyers@efficios.com CC: joel@joelfernandes.org CC: jiangshanlai@gmail.com Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f9533bbcbb36..de0826411311 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -328,7 +328,7 @@ static inline void rcu_preempt_sleep_check(void) { } #else /* #ifdef CONFIG_PROVE_RCU */ -#define RCU_LOCKDEP_WARN(c, s) do { } while (0) +#define RCU_LOCKDEP_WARN(c, s) do { } while (0 && (c)) #define rcu_sleep_check() do { } while (0) #endif /* #else #ifdef CONFIG_PROVE_RCU */ -- cgit v1.2.3 From 6370cc3bbd8a0f9bf975b013781243ab147876c6 Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Thu, 29 Oct 2020 17:36:19 +0000 Subject: net: add kcov handle to skb extensions Remote KCOV coverage collection enables coverage-guided fuzzing of the code that is not reachable during normal system call execution. It is especially helpful for fuzzing networking subsystems, where it is common to perform packet handling in separate work queues even for the packets that originated directly from the user space. Enable coverage-guided frame injection by adding kcov remote handle to skb extensions. Default initialization in __alloc_skb and __build_skb_around ensures that no socket buffer that was generated during a system call will be missed. Code that is of interest and that performs packet processing should be annotated with kcov_remote_start()/kcov_remote_stop(). An alternative approach is to determine kcov_handle solely on the basis of the device/interface that received the specific socket buffer. However, in this case it would be impossible to distinguish between packets that originated during normal background network processes or were intentionally injected from the user space. Signed-off-by: Aleksandr Nogikh Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a828cf99c521..2d01b2bbb746 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4150,6 +4150,9 @@ enum skb_ext_id { #endif #if IS_ENABLED(CONFIG_MPTCP) SKB_EXT_MPTCP, +#endif +#if IS_ENABLED(CONFIG_KCOV) + SKB_EXT_KCOV_HANDLE, #endif SKB_EXT_NUM, /* must be last */ }; @@ -4605,5 +4608,35 @@ static inline void skb_reset_redirect(struct sk_buff *skb) #endif } +#ifdef CONFIG_KCOV +static inline void skb_set_kcov_handle(struct sk_buff *skb, + const u64 kcov_handle) +{ + /* Do not allocate skb extensions only to set kcov_handle to zero + * (as it is zero by default). However, if the extensions are + * already allocated, update kcov_handle anyway since + * skb_set_kcov_handle can be called to zero a previously set + * value. + */ + if (skb_has_extensions(skb) || kcov_handle) { + u64 *kcov_handle_ptr = skb_ext_add(skb, SKB_EXT_KCOV_HANDLE); + + if (kcov_handle_ptr) + *kcov_handle_ptr = kcov_handle; + } +} + +static inline u64 skb_get_kcov_handle(struct sk_buff *skb) +{ + u64 *kcov_handle = skb_ext_find(skb, SKB_EXT_KCOV_HANDLE); + + return kcov_handle ? *kcov_handle : 0; +} +#else +static inline void skb_set_kcov_handle(struct sk_buff *skb, + const u64 kcov_handle) { } +static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { return 0; } +#endif /* CONFIG_KCOV */ + #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- cgit v1.2.3 From 31909e3330c8d65e9a928d40833ebd5feb4f64e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Wei=C3=9F?= Date: Tue, 27 Oct 2020 21:42:56 +0100 Subject: timens: additional helper functions for boottime offset handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide functions for time_namespace to subtract the boottime offset from a timespec64 as well as to apply the boottime offset to u64 types in nanoseconds. Signed-off-by: Michael Weiß Reviewed-by: Andrei Vagin Acked-by: Thomas Gleixner Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20201027204258.7869-2-michael.weiss@aisec.fraunhofer.de --- include/linux/time_namespace.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 5b6031385db0..68770ac9ba89 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -77,6 +77,20 @@ static inline void timens_add_boottime(struct timespec64 *ts) *ts = timespec64_add(*ts, ns_offsets->boottime); } +static inline u64 timens_add_boottime_ns(u64 nsec) +{ + struct timens_offsets *ns_offsets = ¤t->nsproxy->time_ns->offsets; + + return nsec + timespec64_to_ns(&ns_offsets->boottime); +} + +static inline void timens_sub_boottime(struct timespec64 *ts) +{ + struct timens_offsets *ns_offsets = ¤t->nsproxy->time_ns->offsets; + + *ts = timespec64_sub(*ts, ns_offsets->boottime); +} + ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim, struct timens_offsets *offsets); @@ -130,6 +144,14 @@ static inline int timens_on_fork(struct nsproxy *nsproxy, static inline void timens_add_monotonic(struct timespec64 *ts) { } static inline void timens_add_boottime(struct timespec64 *ts) { } + +static inline u64 timens_add_boottime_ns(u64 nsec) +{ + return nsec; +} + +static inline void timens_sub_boottime(struct timespec64 *ts) { } + static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim) { return tim; -- cgit v1.2.3 From 5190db9fdd20fa5ba6084c98a3bc71c2fdf6a871 Mon Sep 17 00:00:00 2001 From: Roman Anufriev Date: Sun, 18 Oct 2020 05:56:54 +0300 Subject: fs/quota: update quota state flags scheme with project quota flags Current quota state flags scheme doesn't include project quota and thus shows all flags after DQUOT_USAGE_ENABLED wrong. Fix this and also add DQUOT_NOLIST_DIRTY to the scheme. Link: https://lore.kernel.org/r/1602989814-28922-1-git-send-email-dotdot@yandex-team.ru Signed-off-by: Roman Anufriev Signed-off-by: Jan Kara --- include/linux/quota.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 27aab84fcbaa..18ebd39c9487 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -448,17 +448,18 @@ struct quota_format_type { }; /** - * Quota state flags - they actually come in two flavors - for users and groups. + * Quota state flags - they come in three flavors - for users, groups and projects. * * Actual typed flags layout: - * USRQUOTA GRPQUOTA - * DQUOT_USAGE_ENABLED 0x0001 0x0002 - * DQUOT_LIMITS_ENABLED 0x0004 0x0008 - * DQUOT_SUSPENDED 0x0010 0x0020 + * USRQUOTA GRPQUOTA PRJQUOTA + * DQUOT_USAGE_ENABLED 0x0001 0x0002 0x0004 + * DQUOT_LIMITS_ENABLED 0x0008 0x0010 0x0020 + * DQUOT_SUSPENDED 0x0040 0x0080 0x0100 * * Following bits are used for non-typed flags: - * DQUOT_QUOTA_SYS_FILE 0x0040 - * DQUOT_NEGATIVE_USAGE 0x0080 + * DQUOT_QUOTA_SYS_FILE 0x0200 + * DQUOT_NEGATIVE_USAGE 0x0400 + * DQUOT_NOLIST_DIRTY 0x0800 */ enum { _DQUOT_USAGE_ENABLED = 0, /* Track disk usage for users */ -- cgit v1.2.3 From 286228d382ba6320f04fa2e7c6fc8d4d92e428f4 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 18 Dec 2019 09:39:02 +0100 Subject: can: can_create_echo_skb(): fix echo skb generation: always use skb_clone() All user space generated SKBs are owned by a socket (unless injected into the key via AF_PACKET). If a socket is closed, all associated skbs will be cleaned up. This leads to a problem when a CAN driver calls can_put_echo_skb() on a unshared SKB. If the socket is closed prior to the TX complete handler, can_get_echo_skb() and the subsequent delivering of the echo SKB to all registered callbacks, a SKB with a refcount of 0 is delivered. To avoid the problem, in can_get_echo_skb() the original SKB is now always cloned, regardless of shared SKB or not. If the process exists it can now safely discard its SKBs, without disturbing the delivery of the echo SKB. The problem shows up in the j1939 stack, when it clones the incoming skb, which detects the already 0 refcount. We can easily reproduce this with following example: testj1939 -B -r can0: & cansend can0 1823ff40#0123 WARNING: CPU: 0 PID: 293 at lib/refcount.c:25 refcount_warn_saturate+0x108/0x174 refcount_t: addition on 0; use-after-free. Modules linked in: coda_vpu imx_vdoa videobuf2_vmalloc dw_hdmi_ahb_audio vcan CPU: 0 PID: 293 Comm: cansend Not tainted 5.5.0-rc6-00376-g9e20dcb7040d #1 Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) Backtrace: [] (dump_backtrace) from [] (show_stack+0x20/0x24) [] (show_stack) from [] (dump_stack+0x8c/0xa0) [] (dump_stack) from [] (__warn+0xe0/0x108) [] (__warn) from [] (warn_slowpath_fmt+0xa8/0xcc) [] (warn_slowpath_fmt) from [] (refcount_warn_saturate+0x108/0x174) [] (refcount_warn_saturate) from [] (j1939_can_recv+0x20c/0x210) [] (j1939_can_recv) from [] (can_rcv_filter+0xb4/0x268) [] (can_rcv_filter) from [] (can_receive+0xb0/0xe4) [] (can_receive) from [] (can_rcv+0x48/0x98) [] (can_rcv) from [] (__netif_receive_skb_one_core+0x64/0x88) [] (__netif_receive_skb_one_core) from [] (__netif_receive_skb+0x38/0x94) [] (__netif_receive_skb) from [] (netif_receive_skb_internal+0x64/0xf8) [] (netif_receive_skb_internal) from [] (netif_receive_skb+0x34/0x19c) [] (netif_receive_skb) from [] (can_rx_offload_napi_poll+0x58/0xb4) Fixes: 0ae89beb283a ("can: add destructor for self generated skbs") Signed-off-by: Oleksij Rempel Link: http://lore.kernel.org/r/20200124132656.22156-1-o.rempel@pengutronix.de Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index 900b9f4e0605..fc61cf4eff1c 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -61,21 +61,17 @@ static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) */ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) { - if (skb_shared(skb)) { - struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + struct sk_buff *nskb; - if (likely(nskb)) { - can_skb_set_owner(nskb, skb->sk); - consume_skb(skb); - return nskb; - } else { - kfree_skb(skb); - return NULL; - } + nskb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!nskb)) { + kfree_skb(skb); + return NULL; } - /* we can assume to have an unshared skb with proper owner */ - return skb; + can_skb_set_owner(nskb, skb->sk); + consume_skb(skb); + return nskb; } #endif /* !_CAN_SKB_H */ -- cgit v1.2.3 From 2e4ef10f58502323ea470bc30ba84d5ddd4e77f0 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sun, 1 Nov 2020 13:17:07 +0000 Subject: net: add GSO UDP L4 and GSO fraglists to the list of software-backed types Commit e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") and commit 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") made UDP L4 and fraglisted GRO/GSO fully supported by the software fallback mode. We can safely add them to NETIF_F_GSO_SOFTWARE to allow logical/virtual netdevs to forward these types of skbs up to the real drivers. Signed-off-by: Alexander Lobakin Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- include/linux/netdev_features.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 0b17c4322b09..934de56644e7 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -207,8 +207,8 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) NETIF_F_FSO) /* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | \ - NETIF_F_GSO_SCTP) +#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_GSO_SCTP | \ + NETIF_F_GSO_UDP_L4 | NETIF_F_GSO_FRAGLIST) /* * If one device supports one of these features, then enable them -- cgit v1.2.3 From 179dfb954790410f65605f1c479c029c2cd73c55 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 26 Oct 2020 11:44:21 +0100 Subject: USB: serial: remove write wait queue The digi_acceleport driver is the only driver still using the port write wake queue so move it to that driver's port data. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- include/linux/usb/serial.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 8e67eff9039f..1c09b922f8b0 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -62,7 +62,6 @@ * @bulk_out_endpointAddress: endpoint address for the bulk out pipe for this * port. * @flags: usb serial port flags - * @write_wait: a wait_queue_head_t used by the port. * @work: work queue entry for the line discipline waking up. * @dev: pointer to the serial device * @@ -108,7 +107,6 @@ struct usb_serial_port { int tx_bytes; unsigned long flags; - wait_queue_head_t write_wait; struct work_struct work; unsigned long sysrq; /* sysrq timeout */ struct device dev; -- cgit v1.2.3 From 2374a045263b47f763571ec87ad7c65ea505188a Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 29 Oct 2020 12:32:18 +0100 Subject: vt: keyboard, remove unneeded func_* declarations Now that we removed the ugly handling of func_buf, remove unneeded declarations of func_* variables. The definitions are in the generated defkeymap.c_shipped, so we cannot really remove them (but we would love to). Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20201029113222.32640-13-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/kbd_kern.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h index bb2246c8ec13..82f29aa35062 100644 --- a/include/linux/kbd_kern.h +++ b/include/linux/kbd_kern.h @@ -9,9 +9,6 @@ extern struct tasklet_struct keyboard_tasklet; extern char *func_table[MAX_NR_FUNC]; -extern char func_buf[]; -extern char *funcbufptr; -extern int funcbufsize, funcbufleft; /* * kbd->xxx contains the VC-local things (flag settings etc..) -- cgit v1.2.3 From 763e4cdc0f6d5cea45c896fef67f7be4bdefcca7 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 29 Oct 2020 14:30:48 -0700 Subject: iomap: support partial page discard on writeback block mapping failure iomap writeback mapping failure only calls into ->discard_page() if the current page has not been added to the ioend. Accordingly, the XFS callback assumes a full page discard and invalidation. This is problematic for sub-page block size filesystems where some portion of a page might have been mapped successfully before a failure to map a delalloc block occurs. ->discard_page() is not called in that error scenario and the bio is explicitly failed by iomap via the error return from ->prepare_ioend(). As a result, the filesystem leaks delalloc blocks and corrupts the filesystem block counters. Since XFS is the only user of ->discard_page(), tweak the semantics to invoke the callback unconditionally on mapping errors and provide the file offset that failed to map. Update xfs_discard_page() to discard the corresponding portion of the file and pass the range along to iomap_invalidatepage(). The latter already properly handles both full and sub-page scenarios by not changing any iomap or page state on sub-page invalidations. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 172b3397a1a3..5bd3cac4df9c 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -221,7 +221,7 @@ struct iomap_writeback_ops { * Optional, allows the file system to discard state on a page where * we failed to submit any I/O. */ - void (*discard_page)(struct page *page); + void (*discard_page)(struct page *page, loff_t fileoff); }; struct iomap_writepage_ctx { -- cgit v1.2.3 From fdaf083cdfb556a45c422c8998268baf1ab26829 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 30 Oct 2020 09:37:30 -0600 Subject: io_uring: properly handle SQPOLL request cancelations Track if a given task io_uring context contains SQPOLL instances, so we can iterate those for cancelation (and request counts). This ensures that we properly wait on SQPOLL contexts, and find everything that needs canceling. Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 868364cea3b7..35b2d845704d 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -30,7 +30,8 @@ struct io_uring_task { struct percpu_counter inflight; struct io_identity __identity; struct io_identity *identity; - bool in_idle; + atomic_t in_idle; + bool sqpoll; }; #if defined(CONFIG_IO_URING) -- cgit v1.2.3 From b6be002bcd1dd1dedb926abf3c90c794eacb77dc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 2 Nov 2020 12:53:16 -0800 Subject: x86/entry: Move nmi entry/exit into common code Lockdep state handling on NMI enter and exit is nothing specific to X86. It's not any different on other architectures. Also the extra state type is not necessary, irqentry_state_t can carry the necessary information as well. Move it to common code and extend irqentry_state_t to carry lockdep state. [ Ira: Make exit_rcu and lockdep a union as they are mutually exclusive between the IRQ and NMI exceptions, and add kernel documentation for struct irqentry_state_t ] Signed-off-by: Thomas Gleixner Signed-off-by: Ira Weiny Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201102205320.1458656-7-ira.weiny@intel.com --- include/linux/entry-common.h | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index b9711e813ec2..1a128baf3628 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -346,8 +346,26 @@ void irqentry_enter_from_user_mode(struct pt_regs *regs); void irqentry_exit_to_user_mode(struct pt_regs *regs); #ifndef irqentry_state +/** + * struct irqentry_state - Opaque object for exception state storage + * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the + * exit path has to invoke rcu_irq_exit(). + * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that + * lockdep state is restored correctly on exit from nmi. + * + * This opaque object is filled in by the irqentry_*_enter() functions and + * must be passed back into the corresponding irqentry_*_exit() functions + * when the exception is complete. + * + * Callers of irqentry_*_[enter|exit]() must consider this structure opaque + * and all members private. Descriptions of the members are provided to aid in + * the maintenance of the irqentry_*() functions. + */ typedef struct irqentry_state { - bool exit_rcu; + union { + bool exit_rcu; + bool lockdep; + }; } irqentry_state_t; #endif @@ -407,4 +425,23 @@ void irqentry_exit_cond_resched(void); */ void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state); +/** + * irqentry_nmi_enter - Handle NMI entry + * @regs: Pointer to currents pt_regs + * + * Similar to irqentry_enter() but taking care of the NMI constraints. + */ +irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs); + +/** + * irqentry_nmi_exit - Handle return from NMI handling + * @regs: Pointer to pt_regs (NMI entry regs) + * @irq_state: Return value from matching call to irqentry_nmi_enter() + * + * Last action before returning to the low level assmenbly code. + * + * Counterpart to irqentry_nmi_enter(). + */ +void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state); + #endif -- cgit v1.2.3 From e1ac4b2406d94eddce8ac2c5ab4235f6075a9602 Mon Sep 17 00:00:00 2001 From: Chester Lin Date: Fri, 30 Oct 2020 14:08:38 +0800 Subject: efi: generalize efi_get_secureboot Generalize the efi_get_secureboot() function so not only efistub but also other subsystems can use it. Note that the MokSbState handling is not factored out: the variable is boot time only, and so it cannot be parameterized as easily. Also, the IMA code will switch to this version in a future patch, and it does not incorporate the MokSbState exception in the first place. Note that the new efi_get_secureboot_mode() helper treats any failures to read SetupMode as setup mode being disabled. Co-developed-by: Chester Lin Signed-off-by: Chester Lin Acked-by: Mimi Zohar Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index d7c0e73af2b9..1cd5d91d8ca1 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1089,7 +1089,28 @@ enum efi_secureboot_mode { efi_secureboot_mode_disabled, efi_secureboot_mode_enabled, }; -enum efi_secureboot_mode efi_get_secureboot(void); + +static inline +enum efi_secureboot_mode efi_get_secureboot_mode(efi_get_variable_t *get_var) +{ + u8 secboot, setupmode = 0; + efi_status_t status; + unsigned long size; + + size = sizeof(secboot); + status = get_var(L"SecureBoot", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size, + &secboot); + if (status == EFI_NOT_FOUND) + return efi_secureboot_mode_disabled; + if (status != EFI_SUCCESS) + return efi_secureboot_mode_unknown; + + size = sizeof(setupmode); + get_var(L"SetupMode", &EFI_GLOBAL_VARIABLE_GUID, NULL, &size, &setupmode); + if (secboot == 0 || setupmode == 1) + return efi_secureboot_mode_disabled; + return efi_secureboot_mode_enabled; +} #ifdef CONFIG_RESET_ATTACK_MITIGATION void efi_enable_reset_attack_mitigation(void); -- cgit v1.2.3 From 9d1c94a69d70f1b02bdf06b231cd16ad47ef06cd Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:33:25 +0200 Subject: clk: fix a kernel-doc markup clk_get_duty_cycle -> clk_get_scaled_duty_cycle Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/b2336f3f3cdfe6e1a2d3a7a056ab7ccc7a81b945.1603469755.git.mchehab+huawei@kernel.org Signed-off-by: Stephen Boyd --- include/linux/clk.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index 7fd6a1febcf4..5f8d5f4931c0 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -150,7 +150,7 @@ int clk_get_phase(struct clk *clk); int clk_set_duty_cycle(struct clk *clk, unsigned int num, unsigned int den); /** - * clk_get_duty_cycle - return the duty cycle ratio of a clock signal + * clk_get_scaled_duty_cycle - return the duty cycle ratio of a clock signal * @clk: clock signal source * @scale: scaling factor to be applied to represent the ratio as an integer * -- cgit v1.2.3 From 2b5b95b1ff3d70a95013a45e3b5b90f1daf42348 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 14 Sep 2020 15:09:33 +0200 Subject: mm: introduce vma_set_file function v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the new vma_set_file() function to allow changing vma->vm_file with the necessary refcount dance. v2: add more users of this. v3: add missing EXPORT_SYMBOL, rebase on mmap cleanup, add comments why we drop the reference on two occasions. v4: make it clear that changing an anonymous vma is illegal. Signed-off-by: Christian König Reviewed-by: Daniel Vetter (v2) Reviewed-by: Jason Gunthorpe Link: https://patchwork.freedesktop.org/patch/394773/ --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ef360fe70aaf..2b7ac36c42dd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2719,6 +2719,8 @@ static inline void vma_set_page_prot(struct vm_area_struct *vma) } #endif +void vma_set_file(struct vm_area_struct *vma, struct file *file); + #ifdef CONFIG_NUMA_BALANCING unsigned long change_prot_numa(struct vm_area_struct *vma, unsigned long start, unsigned long end); -- cgit v1.2.3 From a4da45dda6475816f4c8b9e0d512261991ba31e5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 28 Oct 2020 15:51:17 +0100 Subject: pinctrl: Remove hole in pinctrl_gpio_range On 64-bit platforms, pointer size and alignment are 64-bit, hence two 4-byte holes are present before the pins and gc members of the pinctrl_gpio_range structure. Get rid of these holes by moving the pins pointer. This reduces kernel size of an arm64 Rockchip kernel by ca. 512 bytes. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20201028145117.1731876-1-geert+renesas@glider.be Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinctrl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 2aef59df93d7..70b45d28e7a9 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -51,8 +51,8 @@ struct pinctrl_pin_desc { * @id: an ID number for the chip in this range * @base: base offset of the GPIO range * @pin_base: base pin number of the GPIO range if pins == NULL - * @pins: enumeration of pins in GPIO range or NULL * @npins: number of pins in the GPIO range, including the base number + * @pins: enumeration of pins in GPIO range or NULL * @gc: an optional pointer to a gpio_chip */ struct pinctrl_gpio_range { @@ -61,8 +61,8 @@ struct pinctrl_gpio_range { unsigned int id; unsigned int base; unsigned int pin_base; - unsigned const *pins; unsigned int npins; + unsigned const *pins; struct gpio_chip *gc; }; -- cgit v1.2.3 From e40b0b56ffdc16edce207904a7782da6a1a47162 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 5 Nov 2020 17:05:35 +0100 Subject: Revert "mm: introduce vma_set_file function v4" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel test robot is not happy with that. This reverts commit 2b5b95b1ff3d70a95013a45e3b5b90f1daf42348. Signed-off-by: Christian König Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/394773/ --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2b7ac36c42dd..ef360fe70aaf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2719,8 +2719,6 @@ static inline void vma_set_page_prot(struct vm_area_struct *vma) } #endif -void vma_set_file(struct vm_area_struct *vma, struct file *file); - #ifdef CONFIG_NUMA_BALANCING unsigned long change_prot_numa(struct vm_area_struct *vma, unsigned long start, unsigned long end); -- cgit v1.2.3 From 3b20369313a486246582c8ef6ff5d1d0b9c34613 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 5 Nov 2020 15:48:51 +0800 Subject: EDAC: Add three new memory types There are {Low-Power DDR3/4, WIO2} types of memory. Add new entries to 'enum mem_type' and new strings to 'edac_mem_types[]' for the new types. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck --- include/linux/edac.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 15e8f3d8a895..8f63245f7f7c 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -175,11 +175,14 @@ static inline char *mc_event_error_type(const unsigned int err_type) * @MEM_RDDR3: Registered DDR3 RAM * This is a variant of the DDR3 memories. * @MEM_LRDDR3: Load-Reduced DDR3 memory. + * @MEM_LPDDR3: Low-Power DDR3 memory. * @MEM_DDR4: Unbuffered DDR4 RAM * @MEM_RDDR4: Registered DDR4 RAM * This is a variant of the DDR4 memories. * @MEM_LRDDR4: Load-Reduced DDR4 memory. + * @MEM_LPDDR4: Low-Power DDR4 memory. * @MEM_NVDIMM: Non-volatile RAM + * @MEM_WIO2: Wide I/O 2. */ enum mem_type { MEM_EMPTY = 0, @@ -200,10 +203,13 @@ enum mem_type { MEM_DDR3, MEM_RDDR3, MEM_LRDDR3, + MEM_LPDDR3, MEM_DDR4, MEM_RDDR4, MEM_LRDDR4, + MEM_LPDDR4, MEM_NVDIMM, + MEM_WIO2, }; #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) @@ -223,10 +229,13 @@ enum mem_type { #define MEM_FLAG_XDR BIT(MEM_XDR) #define MEM_FLAG_DDR3 BIT(MEM_DDR3) #define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) +#define MEM_FLAG_LPDDR3 BIT(MEM_LPDDR3) #define MEM_FLAG_DDR4 BIT(MEM_DDR4) #define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) #define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) +#define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4) #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) +#define MEM_FLAG_WIO2 BIT(MEM_WIO2) /** * enum edac-type - Error Detection and Correction capabilities and mode -- cgit v1.2.3 From e4b27ebc780fa7fa951d81d241912755532568ff Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Tue, 3 Nov 2020 08:10:56 +0100 Subject: net: dsa: Add DSA driver for Hirschmann Hellcreek switches Add a basic DSA driver for Hirschmann Hellcreek switches. Those switches are implementing features needed for Time Sensitive Networking (TSN) such as support for the Time Precision Protocol and various shapers like the Time Aware Shaper. This driver includes basic support for networking: * VLAN handling * FDB handling * Port statistics * STP * Phylink Signed-off-by: Kurt Kanzenbach Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- include/linux/platform_data/hirschmann-hellcreek.h | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/linux/platform_data/hirschmann-hellcreek.h (limited to 'include/linux') diff --git a/include/linux/platform_data/hirschmann-hellcreek.h b/include/linux/platform_data/hirschmann-hellcreek.h new file mode 100644 index 000000000000..388846766bb2 --- /dev/null +++ b/include/linux/platform_data/hirschmann-hellcreek.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: (GPL-2.0 or MIT) */ +/* + * Hirschmann Hellcreek TSN switch platform data. + * + * Copyright (C) 2020 Linutronix GmbH + * Author Kurt Kanzenbach + */ + +#ifndef _HIRSCHMANN_HELLCREEK_H_ +#define _HIRSCHMANN_HELLCREEK_H_ + +#include + +struct hellcreek_platform_data { + int num_ports; /* Amount of switch ports */ + int is_100_mbits; /* Is it configured to 100 or 1000 mbit/s */ + int qbv_support; /* Qbv support on front TSN ports */ + int qbv_on_cpu_port; /* Qbv support on the CPU port */ + int qbu_support; /* Qbu support on front TSN ports */ + u16 module_id; /* Module identificaton */ +}; + +#endif /* _HIRSCHMANN_HELLCREEK_H_ */ -- cgit v1.2.3 From 293e9a3d950dfebc76d9fa6931e6f91ef856b9ab Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Sun, 1 Nov 2020 14:50:56 +0200 Subject: net: phy: export phy_error and phy_trigger_machine These functions are currently used by phy_interrupt() to either signal an error condition or to trigger the link state machine. In an attempt to actually support shared PHY IRQs, export these two functions so that the actual PHY drivers can use them. Cc: Alexandru Ardelean Cc: Andre Edich Cc: Antoine Tenart Cc: Baruch Siach Cc: Christophe Leroy Cc: Dan Murphy Cc: Divya Koppera Cc: Florian Fainelli Cc: Hauke Mehrtens Cc: Heiner Kallweit Cc: Jerome Brunet Cc: Kavya Sree Kotagiri Cc: Linus Walleij Cc: Marco Felsch Cc: Marek Vasut Cc: Martin Blumenstingl Cc: Mathias Kresin Cc: Maxim Kochetkov Cc: Michael Walle Cc: Neil Armstrong Cc: Nisar Sayed Cc: Oleksij Rempel Cc: Philippe Schenker Cc: Willy Liu Cc: Yuiko Oshino Signed-off-by: Ioana Ciornei Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index eb3cb1a98b45..566b39f6cd64 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1570,8 +1570,10 @@ void phy_drivers_unregister(struct phy_driver *drv, int n); int phy_driver_register(struct phy_driver *new_driver, struct module *owner); int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); +void phy_error(struct phy_device *phydev); void phy_state_machine(struct work_struct *work); void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies); +void phy_trigger_machine(struct phy_device *phydev); void phy_mac_interrupt(struct phy_device *phydev); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); -- cgit v1.2.3 From 87de1f058aacc8ce4be94e36a38f77b860914a76 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Sun, 1 Nov 2020 14:51:12 +0200 Subject: net: phy: add genphy_handle_interrupt_no_ack() It seems there are cases where the interrupts are handled by another entity (ie an IRQ controller embedded inside the PHY) and do not need any other interraction from phylib. For this kind of PHYs, like the RTL8366RB, add the genphy_handle_interrupt_no_ack() function which just triggers the link state machine. Signed-off-by: Ioana Ciornei Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 566b39f6cd64..4f158d6352ae 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1510,6 +1510,7 @@ int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); int genphy_loopback(struct phy_device *phydev, bool enable); int genphy_soft_reset(struct phy_device *phydev); +irqreturn_t genphy_handle_interrupt_no_ack(struct phy_device *phydev); static inline int genphy_config_aneg(struct phy_device *phydev) { -- cgit v1.2.3 From d8c4a22363853e196497b06a8ee9be9773873a14 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Tue, 3 Nov 2020 18:23:53 +0100 Subject: bus: mhi: Add mhi_queue_is_full function This function can be used by client driver to determine whether it's possible to queue new elements in a channel ring. Signed-off-by: Loic Poulain Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1604424234-24446-1-git-send-email-loic.poulain@linaro.org Signed-off-by: Jakub Kicinski --- include/linux/mhi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index d4841e5a5f45..b9bb7dff32e2 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -737,4 +737,11 @@ int mhi_queue_buf(struct mhi_device *mhi_dev, enum dma_data_direction dir, int mhi_queue_skb(struct mhi_device *mhi_dev, enum dma_data_direction dir, struct sk_buff *skb, size_t len, enum mhi_flags mflags); +/** + * mhi_queue_is_full - Determine whether queueing new elements is possible + * @mhi_dev: Device associated with the channels + * @dir: DMA direction for the channel + */ +bool mhi_queue_is_full(struct mhi_device *mhi_dev, enum dma_data_direction dir); + #endif /* _MHI_H_ */ -- cgit v1.2.3 From c1aedf015ebdd0232757a66e2daccf1246bd609c Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 4 Nov 2020 10:19:22 +0800 Subject: net/usb/r8153_ecm: support ECM mode for RTL8153 Support ECM mode based on cdc_ether with relative mii functions, when CONFIG_USB_RTL8152 is not set, or the device is not supported by r8152 driver. Both r8152 and r8153_ecm would check the return value of rtl8152_get_version() in porbe(). If rtl8152_get_version() return none zero value, the r8152 is used for the device with vendor mode. Otherwise, the r8153_ecm is used for the device with ECM mode. Signed-off-by: Hayes Wang Link: https://lore.kernel.org/r/1394712342-15778-392-Taiwan-albertk@realtek.com Signed-off-by: Jakub Kicinski --- include/linux/usb/r8152.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 include/linux/usb/r8152.h (limited to 'include/linux') diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h new file mode 100644 index 000000000000..20d88b1defc3 --- /dev/null +++ b/include/linux/usb/r8152.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020 Realtek Semiconductor Corp. All rights reserved. + */ + +#ifndef __LINUX_R8152_H +#define __LINUX_R8152_H + +#define RTL8152_REQT_READ 0xc0 +#define RTL8152_REQT_WRITE 0x40 +#define RTL8152_REQ_GET_REGS 0x05 +#define RTL8152_REQ_SET_REGS 0x05 + +#define BYTE_EN_DWORD 0xff +#define BYTE_EN_WORD 0x33 +#define BYTE_EN_BYTE 0x11 +#define BYTE_EN_SIX_BYTES 0x3f +#define BYTE_EN_START_MASK 0x0f +#define BYTE_EN_END_MASK 0xf0 + +#define MCU_TYPE_PLA 0x0100 +#define MCU_TYPE_USB 0x0000 + +/* Define these values to match your device */ +#define VENDOR_ID_REALTEK 0x0bda +#define VENDOR_ID_MICROSOFT 0x045e +#define VENDOR_ID_SAMSUNG 0x04e8 +#define VENDOR_ID_LENOVO 0x17ef +#define VENDOR_ID_LINKSYS 0x13b1 +#define VENDOR_ID_NVIDIA 0x0955 +#define VENDOR_ID_TPLINK 0x2357 + +#if IS_REACHABLE(CONFIG_USB_RTL8152) +extern u8 rtl8152_get_version(struct usb_interface *intf); +#endif + +#endif /* __LINUX_R8152_H */ -- cgit v1.2.3 From 8280c07e0762ba753876c427584a792e86f3f7e7 Mon Sep 17 00:00:00 2001 From: Kurt Lee Date: Mon, 12 Oct 2020 03:43:46 -0500 Subject: ieee80211: Add definition for WFA DPP Add Wi-Fi Alliance definition for DPP (Device Provisioning Protocol). Signed-off-by: Kurt Lee Signed-off-by: Wright Feng Link: https://lore.kernel.org/r/20201012084347.121557-2-wright.feng@cypress.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 770408b2fdaf..5e8cc9c3d45a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3417,6 +3417,8 @@ struct ieee80211_multiple_bssid_configuration { #define WLAN_AKM_SUITE_FT_PSK_SHA384 SUITE(0x000FAC, 19) #define WLAN_AKM_SUITE_PSK_SHA384 SUITE(0x000FAC, 20) +#define WLAN_AKM_SUITE_WFA_DPP SUITE(WLAN_OUI_WFA, 2) + #define WLAN_MAX_KEY_LEN 32 #define WLAN_PMK_NAME_LEN 16 @@ -3427,6 +3429,7 @@ struct ieee80211_multiple_bssid_configuration { #define WLAN_OUI_WFA 0x506f9a #define WLAN_OUI_TYPE_WFA_P2P 9 +#define WLAN_OUI_TYPE_WFA_DPP 0x1A #define WLAN_OUI_MICROSOFT 0x0050f2 #define WLAN_OUI_TYPE_MICROSOFT_WPA 1 #define WLAN_OUI_TYPE_MICROSOFT_WMM 2 -- cgit v1.2.3 From aec51036a166a0aecc26cba101bfbbdc53d64139 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 4 Nov 2020 19:35:17 +0000 Subject: tty: tty_io: Move 'tty_sysctl_init's prototype to shared space MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/tty/tty_ldisc.c:883:6: warning: no previous prototype for ‘tty_sysctl_init’ [-Wmissing-prototypes] 883 | void tty_sysctl_init(void) | ^~~~~~~~~~~~~~~ Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Nick Holloway Cc: -- Cc: Marko Kohtala Cc: Bill Hawes Cc: "C. Scott Ananian" Cc: Russell King Cc: Andrew Morton Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20201104193549.4026187-5-lee.jones@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index a99e9b8e4e31..10212c6e4345 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -716,6 +716,7 @@ extern int __must_check tty_ldisc_init(struct tty_struct *tty); extern void tty_ldisc_deinit(struct tty_struct *tty); extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p, char *f, int count); +extern void tty_sysctl_init(void); /* n_tty.c */ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); -- cgit v1.2.3 From 0264c8c9e1b53e9dbb41fae5e54756e84644bc60 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:36 -0500 Subject: ftrace: Move the recursion testing into global headers Currently, if a callback is registered to a ftrace function and its ftrace_ops does not have the RECURSION flag set, it is encapsulated in a helper function that does the recursion for it. Really, all the callbacks should have their own recursion protection for performance reasons. But they should not all implement their own. Move the recursion helpers to global headers, so that all callbacks can use them. Link: https://lkml.kernel.org/r/20201028115612.460535535@goodmis.org Link: https://lkml.kernel.org/r/20201106023546.166456258@goodmis.org Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 1 + include/linux/trace_recursion.h | 187 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 188 insertions(+) create mode 100644 include/linux/trace_recursion.h (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1bd3a0356ae4..0e4164a7f56d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -7,6 +7,7 @@ #ifndef _LINUX_FTRACE_H #define _LINUX_FTRACE_H +#include #include #include #include diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h new file mode 100644 index 000000000000..dbb7b6d4c94c --- /dev/null +++ b/include/linux/trace_recursion.h @@ -0,0 +1,187 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TRACE_RECURSION_H +#define _LINUX_TRACE_RECURSION_H + +#include +#include + +#ifdef CONFIG_TRACING + +/* Only current can touch trace_recursion */ + +/* + * For function tracing recursion: + * The order of these bits are important. + * + * When function tracing occurs, the following steps are made: + * If arch does not support a ftrace feature: + * call internal function (uses INTERNAL bits) which calls... + * If callback is registered to the "global" list, the list + * function is called and recursion checks the GLOBAL bits. + * then this function calls... + * The function callback, which can use the FTRACE bits to + * check for recursion. + * + * Now if the arch does not support a feature, and it calls + * the global list function which calls the ftrace callback + * all three of these steps will do a recursion protection. + * There's no reason to do one if the previous caller already + * did. The recursion that we are protecting against will + * go through the same steps again. + * + * To prevent the multiple recursion checks, if a recursion + * bit is set that is higher than the MAX bit of the current + * check, then we know that the check was made by the previous + * caller, and we can skip the current check. + */ +enum { + /* Function recursion bits */ + TRACE_FTRACE_BIT, + TRACE_FTRACE_NMI_BIT, + TRACE_FTRACE_IRQ_BIT, + TRACE_FTRACE_SIRQ_BIT, + + /* INTERNAL_BITs must be greater than FTRACE_BITs */ + TRACE_INTERNAL_BIT, + TRACE_INTERNAL_NMI_BIT, + TRACE_INTERNAL_IRQ_BIT, + TRACE_INTERNAL_SIRQ_BIT, + + TRACE_BRANCH_BIT, +/* + * Abuse of the trace_recursion. + * As we need a way to maintain state if we are tracing the function + * graph in irq because we want to trace a particular function that + * was called in irq context but we have irq tracing off. Since this + * can only be modified by current, we can reuse trace_recursion. + */ + TRACE_IRQ_BIT, + + /* Set if the function is in the set_graph_function file */ + TRACE_GRAPH_BIT, + + /* + * In the very unlikely case that an interrupt came in + * at a start of graph tracing, and we want to trace + * the function in that interrupt, the depth can be greater + * than zero, because of the preempted start of a previous + * trace. In an even more unlikely case, depth could be 2 + * if a softirq interrupted the start of graph tracing, + * followed by an interrupt preempting a start of graph + * tracing in the softirq, and depth can even be 3 + * if an NMI came in at the start of an interrupt function + * that preempted a softirq start of a function that + * preempted normal context!!!! Luckily, it can't be + * greater than 3, so the next two bits are a mask + * of what the depth is when we set TRACE_GRAPH_BIT + */ + + TRACE_GRAPH_DEPTH_START_BIT, + TRACE_GRAPH_DEPTH_END_BIT, + + /* + * To implement set_graph_notrace, if this bit is set, we ignore + * function graph tracing of called functions, until the return + * function is called to clear it. + */ + TRACE_GRAPH_NOTRACE_BIT, + + /* + * When transitioning between context, the preempt_count() may + * not be correct. Allow for a single recursion to cover this case. + */ + TRACE_TRANSITION_BIT, +}; + +#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) +#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) +#define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit))) + +#define trace_recursion_depth() \ + (((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3) +#define trace_recursion_set_depth(depth) \ + do { \ + current->trace_recursion &= \ + ~(3 << TRACE_GRAPH_DEPTH_START_BIT); \ + current->trace_recursion |= \ + ((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT; \ + } while (0) + +#define TRACE_CONTEXT_BITS 4 + +#define TRACE_FTRACE_START TRACE_FTRACE_BIT +#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1) + +#define TRACE_LIST_START TRACE_INTERNAL_BIT +#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) + +#define TRACE_CONTEXT_MASK TRACE_LIST_MAX + +static __always_inline int trace_get_context_bit(void) +{ + int bit; + + if (in_interrupt()) { + if (in_nmi()) + bit = 0; + + else if (in_irq()) + bit = 1; + else + bit = 2; + } else + bit = 3; + + return bit; +} + +static __always_inline int trace_test_and_set_recursion(int start, int max) +{ + unsigned int val = current->trace_recursion; + int bit; + + /* A previous recursion check was made */ + if ((val & TRACE_CONTEXT_MASK) > max) + return 0; + + bit = trace_get_context_bit() + start; + if (unlikely(val & (1 << bit))) { + /* + * It could be that preempt_count has not been updated during + * a switch between contexts. Allow for a single recursion. + */ + bit = TRACE_TRANSITION_BIT; + if (trace_recursion_test(bit)) + return -1; + trace_recursion_set(bit); + barrier(); + return bit + 1; + } + + /* Normal check passed, clear the transition to allow it again */ + trace_recursion_clear(TRACE_TRANSITION_BIT); + + val |= 1 << bit; + current->trace_recursion = val; + barrier(); + + return bit + 1; +} + +static __always_inline void trace_clear_recursion(int bit) +{ + unsigned int val = current->trace_recursion; + + if (!bit) + return; + + bit--; + bit = 1 << bit; + val &= ~bit; + + barrier(); + current->trace_recursion = val; +} + +#endif /* CONFIG_TRACING */ +#endif /* _LINUX_TRACE_RECURSION_H */ -- cgit v1.2.3 From 6e4eb9cb22fc8a893cb708ed42644de5ee7c3827 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:37 -0500 Subject: ftrace: Add ftrace_test_recursion_trylock() helper function To make it easier for ftrace callbacks to have recursion protection, provide a ftrace_test_recursion_trylock() and ftrace_test_recursion_unlock() helper that tests for recursion. Link: https://lkml.kernel.org/r/20201028115612.634927593@goodmis.org Link: https://lkml.kernel.org/r/20201106023546.378584067@goodmis.org Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_recursion.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index dbb7b6d4c94c..f2a949dbfec7 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -183,5 +183,30 @@ static __always_inline void trace_clear_recursion(int bit) current->trace_recursion = val; } +/** + * ftrace_test_recursion_trylock - tests for recursion in same context + * + * Use this for ftrace callbacks. This will detect if the function + * tracing recursed in the same context (normal vs interrupt), + * + * Returns: -1 if a recursion happened. + * >= 0 if no recursion + */ +static __always_inline int ftrace_test_recursion_trylock(void) +{ + return trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX); +} + +/** + * ftrace_test_recursion_unlock - called when function callback is complete + * @bit: The return of a successful ftrace_test_recursion_trylock() + * + * This is used at the end of a ftrace callback. + */ +static __always_inline void ftrace_test_recursion_unlock(int bit) +{ + trace_clear_recursion(bit); +} + #endif /* CONFIG_TRACING */ #endif /* _LINUX_TRACE_RECURSION_H */ -- cgit v1.2.3 From da5afbeb1724609996ca7bb4fbce2cd104c95914 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:38 -0500 Subject: ftrace: Optimize testing what context current is in The preempt_count() is not a simple location in memory, it could be part of per_cpu code or more. Each access to preempt_count(), or one of its accessor functions (like in_interrupt()) takes several cycles. By reading preempt_count() once, and then doing tests to find the context against the value return is slightly faster than using in_nmi() and in_interrupt(). Link: https://lkml.kernel.org/r/20201028115612.780796355@goodmis.org Link: https://lkml.kernel.org/r/20201106023546.558881845@goodmis.org Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_recursion.h | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index f2a949dbfec7..ac3d73484cb2 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -117,22 +117,29 @@ enum { #define TRACE_CONTEXT_MASK TRACE_LIST_MAX +/* + * Used for setting context + * NMI = 0 + * IRQ = 1 + * SOFTIRQ = 2 + * NORMAL = 3 + */ +enum { + TRACE_CTX_NMI, + TRACE_CTX_IRQ, + TRACE_CTX_SOFTIRQ, + TRACE_CTX_NORMAL, +}; + static __always_inline int trace_get_context_bit(void) { - int bit; - - if (in_interrupt()) { - if (in_nmi()) - bit = 0; - - else if (in_irq()) - bit = 1; - else - bit = 2; - } else - bit = 3; + unsigned long pc = preempt_count(); - return bit; + if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) + return TRACE_CTX_NORMAL; + else + return pc & NMI_MASK ? TRACE_CTX_NMI : + pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ; } static __always_inline int trace_test_and_set_recursion(int start, int max) -- cgit v1.2.3 From a25d036d939a30623ff73ecad9c8b9116b02e823 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:45 -0500 Subject: ftrace: Reverse what the RECURSION flag means in the ftrace_ops Now that all callbacks are recursion safe, reverse the meaning of the RECURSION flag and rename it from RECURSION_SAFE to simply RECURSION. Now only callbacks that request to have recursion protecting it will have the added trampoline to do so. Also remove the outdated comment about "PER_CPU" when determining to use the ftrace_ops_assist_func. Link: https://lkml.kernel.org/r/20201028115613.742454631@goodmis.org Link: https://lkml.kernel.org/r/20201106023547.904270143@goodmis.org Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Jonathan Corbet Cc: Sebastian Andrzej Siewior Cc: Miroslav Benes Cc: Kamalesh Babulal Cc: Petr Mladek Cc: linux-doc@vger.kernel.org Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0e4164a7f56d..806196345c3f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -98,7 +98,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); /* * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are * set in the flags member. - * CONTROL, SAVE_REGS, SAVE_REGS_IF_SUPPORTED, RECURSION_SAFE, STUB and + * CONTROL, SAVE_REGS, SAVE_REGS_IF_SUPPORTED, RECURSION, STUB and * IPMODIFY are a kind of attribute flags which can be set only before * registering the ftrace_ops, and can not be modified while registered. * Changing those attribute flags after registering ftrace_ops will @@ -121,10 +121,10 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * passing regs to the handler. * Note, if this flag is set, the SAVE_REGS flag will automatically * get set upon registering the ftrace_ops, if the arch supports it. - * RECURSION_SAFE - The ftrace_ops can set this to tell the ftrace infrastructure - * that the call back has its own recursion protection. If it does - * not set this, then the ftrace infrastructure will add recursion - * protection for the caller. + * RECURSION - The ftrace_ops can set this to tell the ftrace infrastructure + * that the call back needs recursion protection. If it does + * not set this, then the ftrace infrastructure will assume + * that the callback can handle recursion on its own. * STUB - The ftrace_ops is just a place holder. * INITIALIZED - The ftrace_ops has already been initialized (first use time * register_ftrace_function() is called, it will initialized the ops) @@ -156,7 +156,7 @@ enum { FTRACE_OPS_FL_DYNAMIC = BIT(1), FTRACE_OPS_FL_SAVE_REGS = BIT(2), FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = BIT(3), - FTRACE_OPS_FL_RECURSION_SAFE = BIT(4), + FTRACE_OPS_FL_RECURSION = BIT(4), FTRACE_OPS_FL_STUB = BIT(5), FTRACE_OPS_FL_INITIALIZED = BIT(6), FTRACE_OPS_FL_DELETED = BIT(7), -- cgit v1.2.3 From 773c16705058e9be7b0f4ce124e89cd231c120a2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:46 -0500 Subject: ftrace: Add recording of functions that caused recursion This adds CONFIG_FTRACE_RECORD_RECURSION that will record to a file "recursed_functions" all the functions that caused recursion while a callback to the function tracer was running. Link: https://lkml.kernel.org/r/20201106023548.102375687@goodmis.org Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Guo Ren Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Thomas Gleixner Cc: Borislav Petkov Cc: x86@kernel.org Cc: "H. Peter Anvin" Cc: Kees Cook Cc: Anton Vorontsov Cc: Colin Cross Cc: Tony Luck Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Miroslav Benes Cc: Petr Mladek Cc: Joe Lawrence Cc: Kamalesh Babulal Cc: Mauro Carvalho Chehab Cc: Sebastian Andrzej Siewior Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-csky@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-s390@vger.kernel.org Cc: live-patching@vger.kernel.org Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_recursion.h | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index ac3d73484cb2..228cc56ed66e 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -91,6 +91,9 @@ enum { * not be correct. Allow for a single recursion to cover this case. */ TRACE_TRANSITION_BIT, + + /* Used to prevent recursion recording from recursing. */ + TRACE_RECORD_RECURSION_BIT, }; #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) @@ -142,7 +145,22 @@ static __always_inline int trace_get_context_bit(void) pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ; } -static __always_inline int trace_test_and_set_recursion(int start, int max) +#ifdef CONFIG_FTRACE_RECORD_RECURSION +extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip); +# define do_ftrace_record_recursion(ip, pip) \ + do { \ + if (!trace_recursion_test(TRACE_RECORD_RECURSION_BIT)) { \ + trace_recursion_set(TRACE_RECORD_RECURSION_BIT); \ + ftrace_record_recursion(ip, pip); \ + trace_recursion_clear(TRACE_RECORD_RECURSION_BIT); \ + } \ + } while (0) +#else +# define do_ftrace_record_recursion(ip, pip) do { } while (0) +#endif + +static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip, + int start, int max) { unsigned int val = current->trace_recursion; int bit; @@ -158,8 +176,10 @@ static __always_inline int trace_test_and_set_recursion(int start, int max) * a switch between contexts. Allow for a single recursion. */ bit = TRACE_TRANSITION_BIT; - if (trace_recursion_test(bit)) + if (trace_recursion_test(bit)) { + do_ftrace_record_recursion(ip, pip); return -1; + } trace_recursion_set(bit); barrier(); return bit + 1; @@ -199,9 +219,10 @@ static __always_inline void trace_clear_recursion(int bit) * Returns: -1 if a recursion happened. * >= 0 if no recursion */ -static __always_inline int ftrace_test_recursion_trylock(void) +static __always_inline int ftrace_test_recursion_trylock(unsigned long ip, + unsigned long parent_ip) { - return trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX); + return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX); } /** -- cgit v1.2.3 From 88b8138b240b43d5215bf7cb422692cd8db51f6f Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 6 Nov 2020 14:03:31 +0100 Subject: tty: serial: remove pnx8xxx uart driver Commit 625326ea9c84 ("MIPS: Remove PNX833x alias NXP_STB22x") removed support for PNX833x, so it's time to remove serial driver, too. Signed-off-by: Thomas Bogendoerfer Link: https://lore.kernel.org/r/20201106130332.103476-1-tsbogend@alpha.franken.de Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_pnx8xxx.h | 67 ------------------------------------------ 1 file changed, 67 deletions(-) delete mode 100644 include/linux/serial_pnx8xxx.h (limited to 'include/linux') diff --git a/include/linux/serial_pnx8xxx.h b/include/linux/serial_pnx8xxx.h deleted file mode 100644 index 619d748dcd44..000000000000 --- a/include/linux/serial_pnx8xxx.h +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Embedded Alley Solutions, source@embeddedalley.com. - */ - -#ifndef _LINUX_SERIAL_PNX8XXX_H -#define _LINUX_SERIAL_PNX8XXX_H - -#include - -#define PNX8XXX_NR_PORTS 2 - -struct pnx8xxx_port { - struct uart_port port; - struct timer_list timer; - unsigned int old_status; -}; - -/* register offsets */ -#define PNX8XXX_LCR 0 -#define PNX8XXX_MCR 0x004 -#define PNX8XXX_BAUD 0x008 -#define PNX8XXX_CFG 0x00c -#define PNX8XXX_FIFO 0x028 -#define PNX8XXX_ISTAT 0xfe0 -#define PNX8XXX_IEN 0xfe4 -#define PNX8XXX_ICLR 0xfe8 -#define PNX8XXX_ISET 0xfec -#define PNX8XXX_PD 0xff4 -#define PNX8XXX_MID 0xffc - -#define PNX8XXX_UART_LCR_TXBREAK (1<<30) -#define PNX8XXX_UART_LCR_PAREVN 0x10000000 -#define PNX8XXX_UART_LCR_PAREN 0x08000000 -#define PNX8XXX_UART_LCR_2STOPB 0x04000000 -#define PNX8XXX_UART_LCR_8BIT 0x01000000 -#define PNX8XXX_UART_LCR_TX_RST 0x00040000 -#define PNX8XXX_UART_LCR_RX_RST 0x00020000 -#define PNX8XXX_UART_LCR_RX_NEXT 0x00010000 - -#define PNX8XXX_UART_MCR_SCR 0xFF000000 -#define PNX8XXX_UART_MCR_DCD 0x00800000 -#define PNX8XXX_UART_MCR_CTS 0x00100000 -#define PNX8XXX_UART_MCR_LOOP 0x00000010 -#define PNX8XXX_UART_MCR_RTS 0x00000002 -#define PNX8XXX_UART_MCR_DTR 0x00000001 - -#define PNX8XXX_UART_INT_TX 0x00000080 -#define PNX8XXX_UART_INT_EMPTY 0x00000040 -#define PNX8XXX_UART_INT_RCVTO 0x00000020 -#define PNX8XXX_UART_INT_RX 0x00000010 -#define PNX8XXX_UART_INT_RXOVRN 0x00000008 -#define PNX8XXX_UART_INT_FRERR 0x00000004 -#define PNX8XXX_UART_INT_BREAK 0x00000002 -#define PNX8XXX_UART_INT_PARITY 0x00000001 -#define PNX8XXX_UART_INT_ALLRX 0x0000003F -#define PNX8XXX_UART_INT_ALLTX 0x000000C0 - -#define PNX8XXX_UART_FIFO_TXFIFO 0x001F0000 -#define PNX8XXX_UART_FIFO_TXFIFO_STA (0x1f<<16) -#define PNX8XXX_UART_FIFO_RXBRK 0x00008000 -#define PNX8XXX_UART_FIFO_RXFE 0x00004000 -#define PNX8XXX_UART_FIFO_RXPAR 0x00002000 -#define PNX8XXX_UART_FIFO_RXFIFO 0x00001F00 -#define PNX8XXX_UART_FIFO_RBRTHR 0x000000FF - -#endif -- cgit v1.2.3 From 4cf1bc1f10452065a29d576fc5693fc4fab5b919 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 6 Nov 2020 10:37:40 +0000 Subject: bpf: Implement task local storage Similar to bpf_local_storage for sockets and inodes add local storage for task_struct. The life-cycle of storage is managed with the life-cycle of the task_struct. i.e. the storage is destroyed along with the owning task with a callback to the bpf_task_storage_free from the task_free LSM hook. The BPF LSM allocates an __rcu pointer to the bpf_local_storage in the security blob which are now stackable and can co-exist with other LSMs. The userspace map operations can be done by using a pid fd as a key passed to the lookup, update and delete operations. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201106103747.2780972-3-kpsingh@chromium.org --- include/linux/bpf_lsm.h | 23 +++++++++++++++++++++++ include/linux/bpf_types.h | 1 + 2 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index aaacb6aafc87..73226181b744 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -7,6 +7,7 @@ #ifndef _LINUX_BPF_LSM_H #define _LINUX_BPF_LSM_H +#include #include #include @@ -35,9 +36,21 @@ static inline struct bpf_storage_blob *bpf_inode( return inode->i_security + bpf_lsm_blob_sizes.lbs_inode; } +static inline struct bpf_storage_blob *bpf_task( + const struct task_struct *task) +{ + if (unlikely(!task->security)) + return NULL; + + return task->security + bpf_lsm_blob_sizes.lbs_task; +} + extern const struct bpf_func_proto bpf_inode_storage_get_proto; extern const struct bpf_func_proto bpf_inode_storage_delete_proto; +extern const struct bpf_func_proto bpf_task_storage_get_proto; +extern const struct bpf_func_proto bpf_task_storage_delete_proto; void bpf_inode_storage_free(struct inode *inode); +void bpf_task_storage_free(struct task_struct *task); #else /* !CONFIG_BPF_LSM */ @@ -53,10 +66,20 @@ static inline struct bpf_storage_blob *bpf_inode( return NULL; } +static inline struct bpf_storage_blob *bpf_task( + const struct task_struct *task) +{ + return NULL; +} + static inline void bpf_inode_storage_free(struct inode *inode) { } +static inline void bpf_task_storage_free(struct task_struct *task) +{ +} + #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 2e6f568377f1..99f7fd657d87 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -109,6 +109,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) #endif #ifdef CONFIG_BPF_LSM BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) -- cgit v1.2.3 From 3ca1032ab7ab010eccb107aa515598788f7d93bb Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 6 Nov 2020 10:37:43 +0000 Subject: bpf: Implement get_current_task_btf and RET_PTR_TO_BTF_ID The currently available bpf_get_current_task returns an unsigned integer which can be used along with BPF_CORE_READ to read data from the task_struct but still cannot be used as an input argument to a helper that accepts an ARG_PTR_TO_BTF_ID of type task_struct. In order to implement this helper a new return type, RET_PTR_TO_BTF_ID, is added. This is similar to RET_PTR_TO_BTF_ID_OR_NULL but does not require checking the nullness of returned pointer. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201106103747.2780972-6-kpsingh@chromium.org --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2fffd30e13ac..73d5381a5d5c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -310,6 +310,7 @@ enum bpf_return_type { RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ + RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs -- cgit v1.2.3 From d4d50710a8b46082224376ef119a4dbb75b25c56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Nov 2020 09:27:33 +0100 Subject: seq_file: add seq_read_iter iov_iter based variant for reading a seq_file. seq_read is reimplemented on top of the iter variant. Signed-off-by: Christoph Hellwig Tested-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- include/linux/seq_file.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 813614d4b71f..b83b3ae3c877 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -107,6 +107,7 @@ void seq_pad(struct seq_file *m, char c); char *mangle_path(char *s, const char *p, const char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); +ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter); loff_t seq_lseek(struct file *, loff_t, int); int seq_release(struct inode *, struct file *); int seq_write(struct seq_file *seq, const void *data, size_t len); -- cgit v1.2.3 From b819fd9da38508e0504624b87d9983fcc4237f3c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Nov 2020 10:27:14 +0100 Subject: highmem: Remove unused functions Nothing uses totalhigh_pages_dec() and totalhigh_pages_set(). Signed-off-by: Thomas Gleixner Cc: Christoph Hellwig Cc: Andrew Morton Link: https://lore.kernel.org/r/20201103095856.732891880@linutronix.de --- include/linux/highmem.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 14e6202ce47f..f5c31338f0a3 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -104,21 +104,11 @@ static inline void totalhigh_pages_inc(void) atomic_long_inc(&_totalhigh_pages); } -static inline void totalhigh_pages_dec(void) -{ - atomic_long_dec(&_totalhigh_pages); -} - static inline void totalhigh_pages_add(long count) { atomic_long_add(count, &_totalhigh_pages); } -static inline void totalhigh_pages_set(long val) -{ - atomic_long_set(&_totalhigh_pages, val); -} - void kmap_flush_unused(void); struct page *kmap_to_page(void *addr); -- cgit v1.2.3 From 298fa1ad5571f59cb3ca5497a9455f36867f065e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Nov 2020 10:27:18 +0100 Subject: highmem: Provide generic variant of kmap_atomic* The kmap_atomic* interfaces in all architectures are pretty much the same except for post map operations (flush) and pre- and post unmap operations. Provide a generic variant for that. Signed-off-by: Thomas Gleixner Cc: Linus Torvalds Cc: Christoph Hellwig Cc: Andrew Morton Link: https://lore.kernel.org/r/20201103095857.175939340@linutronix.de --- include/linux/highmem.h | 82 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 66 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index f5c31338f0a3..f5ecee9c2576 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -31,9 +31,16 @@ static inline void invalidate_kernel_vmap_range(void *vaddr, int size) #include +/* + * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft. + */ +#ifdef CONFIG_KMAP_LOCAL +void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); +void *__kmap_local_page_prot(struct page *page, pgprot_t prot); +void kunmap_local_indexed(void *vaddr); +#endif + #ifdef CONFIG_HIGHMEM -extern void *kmap_atomic_high_prot(struct page *page, pgprot_t prot); -extern void kunmap_atomic_high(void *kvaddr); #include #ifndef ARCH_HAS_KMAP_FLUSH_TLB @@ -81,6 +88,11 @@ static inline void kunmap(struct page *page) * be used in IRQ contexts, so in some (very limited) cases we need * it. */ + +#ifndef CONFIG_KMAP_LOCAL +void *kmap_atomic_high_prot(struct page *page, pgprot_t prot); +void kunmap_atomic_high(void *kvaddr); + static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) { preempt_disable(); @@ -89,7 +101,38 @@ static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) return page_address(page); return kmap_atomic_high_prot(page, prot); } -#define kmap_atomic(page) kmap_atomic_prot(page, kmap_prot) + +static inline void __kunmap_atomic(void *vaddr) +{ + kunmap_atomic_high(vaddr); +} +#else /* !CONFIG_KMAP_LOCAL */ + +static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +{ + preempt_disable(); + pagefault_disable(); + return __kmap_local_page_prot(page, prot); +} + +static inline void *kmap_atomic_pfn(unsigned long pfn) +{ + preempt_disable(); + pagefault_disable(); + return __kmap_local_pfn_prot(pfn, kmap_prot); +} + +static inline void __kunmap_atomic(void *addr) +{ + kunmap_local_indexed(addr); +} + +#endif /* CONFIG_KMAP_LOCAL */ + +static inline void *kmap_atomic(struct page *page) +{ + return kmap_atomic_prot(page, kmap_prot); +} /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); @@ -147,25 +190,33 @@ static inline void *kmap_atomic(struct page *page) pagefault_disable(); return page_address(page); } -#define kmap_atomic_prot(page, prot) kmap_atomic(page) -static inline void kunmap_atomic_high(void *addr) +static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +{ + return kmap_atomic(page); +} + +static inline void *kmap_atomic_pfn(unsigned long pfn) +{ + return kmap_atomic(pfn_to_page(pfn)); +} + +static inline void __kunmap_atomic(void *addr) { /* * Mostly nothing to do in the CONFIG_HIGHMEM=n case as kunmap_atomic() - * handles re-enabling faults + preemption + * handles re-enabling faults and preemption */ #ifdef ARCH_HAS_FLUSH_ON_KUNMAP kunmap_flush_on_unmap(addr); #endif } -#define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) - #define kmap_flush_unused() do {} while(0) #endif /* CONFIG_HIGHMEM */ +#if !defined(CONFIG_KMAP_LOCAL) #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) DECLARE_PER_CPU(int, __kmap_atomic_idx); @@ -196,22 +247,21 @@ static inline void kmap_atomic_idx_pop(void) __this_cpu_dec(__kmap_atomic_idx); #endif } - +#endif #endif /* * Prevent people trying to call kunmap_atomic() as if it were kunmap() * kunmap_atomic() should get the return value of kmap_atomic, not the page. */ -#define kunmap_atomic(addr) \ -do { \ - BUILD_BUG_ON(__same_type((addr), struct page *)); \ - kunmap_atomic_high(addr); \ - pagefault_enable(); \ - preempt_enable(); \ +#define kunmap_atomic(__addr) \ +do { \ + BUILD_BUG_ON(__same_type((__addr), struct page *)); \ + __kunmap_atomic(__addr); \ + pagefault_enable(); \ + preempt_enable(); \ } while (0) - /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ #ifndef clear_user_highpage static inline void clear_user_highpage(struct page *page, unsigned long vaddr) -- cgit v1.2.3 From 157e118b55113d1e6c7f8ddfcec0a1dbf3a69511 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Nov 2020 10:27:20 +0100 Subject: x86/mm/highmem: Use generic kmap atomic implementation Convert X86 to the generic kmap atomic implementation and make the iomap_atomic() naming convention consistent while at it. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201103095857.375127260@linutronix.de --- include/linux/highmem.h | 2 +- include/linux/io-mapping.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index f5ecee9c2576..1222a31be842 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -217,7 +217,7 @@ static inline void __kunmap_atomic(void *addr) #endif /* CONFIG_HIGHMEM */ #if !defined(CONFIG_KMAP_LOCAL) -#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) +#if defined(CONFIG_HIGHMEM) DECLARE_PER_CPU(int, __kmap_atomic_idx); diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index c75e4d3d8833..3b0940be72e9 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -69,7 +69,7 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, BUG_ON(offset >= mapping->size); phys_addr = mapping->base + offset; - return iomap_atomic_prot_pfn(PHYS_PFN(phys_addr), mapping->prot); + return iomap_atomic_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); } static inline void -- cgit v1.2.3 From d7029e4549691ecaf1ead536d3322a00bda85659 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Nov 2020 10:27:30 +0100 Subject: highmem: Get rid of kmap_types.h The header is not longer used and on alpha, ia64, openrisc, parisc and um it was completely unused anyway as these architectures have no highmem support. Signed-off-by: Thomas Gleixner Cc: Christoph Hellwig Cc: Andrew Morton Cc: Arnd Bergmann Link: https://lore.kernel.org/r/20201103095858.422094352@linutronix.de --- include/linux/highmem.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 1222a31be842..de78869454b1 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -29,8 +29,6 @@ static inline void invalidate_kernel_vmap_range(void *vaddr, int size) } #endif -#include - /* * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft. */ -- cgit v1.2.3 From 3c1016b53c311906878c703af1e2b29855a9a962 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Nov 2020 10:27:31 +0100 Subject: mm/highmem: Remove the old kmap_atomic cruft All users gone. Signed-off-by: Thomas Gleixner Cc: Linus Torvalds Cc: Christoph Hellwig Cc: Andrew Morton Cc: Arnd Bergmann Link: https://lore.kernel.org/r/20201103095858.516281567@linutronix.de --- include/linux/highmem.h | 63 ++++--------------------------------------------- 1 file changed, 4 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index de78869454b1..3180a8f7307f 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -86,31 +86,16 @@ static inline void kunmap(struct page *page) * be used in IRQ contexts, so in some (very limited) cases we need * it. */ - -#ifndef CONFIG_KMAP_LOCAL -void *kmap_atomic_high_prot(struct page *page, pgprot_t prot); -void kunmap_atomic_high(void *kvaddr); - static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) { preempt_disable(); pagefault_disable(); - if (!PageHighMem(page)) - return page_address(page); - return kmap_atomic_high_prot(page, prot); -} - -static inline void __kunmap_atomic(void *vaddr) -{ - kunmap_atomic_high(vaddr); + return __kmap_local_page_prot(page, prot); } -#else /* !CONFIG_KMAP_LOCAL */ -static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +static inline void *kmap_atomic(struct page *page) { - preempt_disable(); - pagefault_disable(); - return __kmap_local_page_prot(page, prot); + return kmap_atomic_prot(page, kmap_prot); } static inline void *kmap_atomic_pfn(unsigned long pfn) @@ -125,13 +110,6 @@ static inline void __kunmap_atomic(void *addr) kunmap_local_indexed(addr); } -#endif /* CONFIG_KMAP_LOCAL */ - -static inline void *kmap_atomic(struct page *page) -{ - return kmap_atomic_prot(page, kmap_prot); -} - /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); extern atomic_long_t _totalhigh_pages; @@ -212,41 +190,8 @@ static inline void __kunmap_atomic(void *addr) #define kmap_flush_unused() do {} while(0) -#endif /* CONFIG_HIGHMEM */ - -#if !defined(CONFIG_KMAP_LOCAL) -#if defined(CONFIG_HIGHMEM) - -DECLARE_PER_CPU(int, __kmap_atomic_idx); - -static inline int kmap_atomic_idx_push(void) -{ - int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; - -#ifdef CONFIG_DEBUG_HIGHMEM - WARN_ON_ONCE(in_irq() && !irqs_disabled()); - BUG_ON(idx >= KM_TYPE_NR); -#endif - return idx; -} - -static inline int kmap_atomic_idx(void) -{ - return __this_cpu_read(__kmap_atomic_idx) - 1; -} -static inline void kmap_atomic_idx_pop(void) -{ -#ifdef CONFIG_DEBUG_HIGHMEM - int idx = __this_cpu_dec_return(__kmap_atomic_idx); - - BUG_ON(idx < 0); -#else - __this_cpu_dec(__kmap_atomic_idx); -#endif -} -#endif -#endif +#endif /* CONFIG_HIGHMEM */ /* * Prevent people trying to call kunmap_atomic() as if it were kunmap() -- cgit v1.2.3 From 351191ad55c8a1eccaf23e4187c62056229c0779 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Nov 2020 10:27:32 +0100 Subject: io-mapping: Cleanup atomic iomap Switch the atomic iomap implementation over to kmap_local and stick the preempt/pagefault mechanics into the generic code similar to the kmap_atomic variants. Rename the x86 map function in preparation for a non-atomic variant. Signed-off-by: Thomas Gleixner Cc: Linus Torvalds Cc: Christoph Hellwig Cc: Andrew Morton Link: https://lore.kernel.org/r/20201103095858.625310005@linutronix.de --- include/linux/io-mapping.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 3b0940be72e9..60e7c83e4904 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -69,13 +69,17 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, BUG_ON(offset >= mapping->size); phys_addr = mapping->base + offset; - return iomap_atomic_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); + preempt_disable(); + pagefault_disable(); + return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); } static inline void io_mapping_unmap_atomic(void __iomem *vaddr) { - iounmap_atomic(vaddr); + kunmap_local_indexed((void __force *)vaddr); + pagefault_enable(); + preempt_enable(); } static inline void __iomem * -- cgit v1.2.3 From 13f876ba77ebd5125799bb042201f22cf73df154 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Nov 2020 10:27:34 +0100 Subject: highmem: High implementation details and document API Move the gory details of kmap & al into a private header and only document the interfaces which are usable by drivers. Signed-off-by: Thomas Gleixner Cc: Linus Torvalds Cc: Christoph Hellwig Cc: Andrew Morton Link: https://lore.kernel.org/r/20201103095858.827582066@linutronix.de --- include/linux/highmem-internal.h | 174 +++++++++++++++++++++++++ include/linux/highmem.h | 266 ++++++++++++++------------------------- 2 files changed, 270 insertions(+), 170 deletions(-) create mode 100644 include/linux/highmem-internal.h (limited to 'include/linux') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h new file mode 100644 index 000000000000..6ceed907b14e --- /dev/null +++ b/include/linux/highmem-internal.h @@ -0,0 +1,174 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_HIGHMEM_INTERNAL_H +#define _LINUX_HIGHMEM_INTERNAL_H + +/* + * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft. + */ +#ifdef CONFIG_KMAP_LOCAL +void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); +void *__kmap_local_page_prot(struct page *page, pgprot_t prot); +void kunmap_local_indexed(void *vaddr); +#endif + +#ifdef CONFIG_HIGHMEM +#include + +#ifndef ARCH_HAS_KMAP_FLUSH_TLB +static inline void kmap_flush_tlb(unsigned long addr) { } +#endif + +#ifndef kmap_prot +#define kmap_prot PAGE_KERNEL +#endif + +void *kmap_high(struct page *page); +void kunmap_high(struct page *page); +void __kmap_flush_unused(void); +struct page *__kmap_to_page(void *addr); + +static inline void *kmap(struct page *page) +{ + void *addr; + + might_sleep(); + if (!PageHighMem(page)) + addr = page_address(page); + else + addr = kmap_high(page); + kmap_flush_tlb((unsigned long)addr); + return addr; +} + +static inline void kunmap(struct page *page) +{ + might_sleep(); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} + +static inline struct page *kmap_to_page(void *addr) +{ + return __kmap_to_page(addr); +} + +static inline void kmap_flush_unused(void) +{ + __kmap_flush_unused(); +} + +static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +{ + preempt_disable(); + pagefault_disable(); + return __kmap_local_page_prot(page, prot); +} + +static inline void *kmap_atomic(struct page *page) +{ + return kmap_atomic_prot(page, kmap_prot); +} + +static inline void *kmap_atomic_pfn(unsigned long pfn) +{ + preempt_disable(); + pagefault_disable(); + return __kmap_local_pfn_prot(pfn, kmap_prot); +} + +static inline void __kunmap_atomic(void *addr) +{ + kunmap_local_indexed(addr); + pagefault_enable(); + preempt_enable(); +} + +unsigned int __nr_free_highpages(void); +extern atomic_long_t _totalhigh_pages; + +static inline unsigned int nr_free_highpages(void) +{ + return __nr_free_highpages(); +} + +static inline unsigned long totalhigh_pages(void) +{ + return (unsigned long)atomic_long_read(&_totalhigh_pages); +} + +static inline void totalhigh_pages_inc(void) +{ + atomic_long_inc(&_totalhigh_pages); +} + +static inline void totalhigh_pages_add(long count) +{ + atomic_long_add(count, &_totalhigh_pages); +} + +#else /* CONFIG_HIGHMEM */ + +static inline struct page *kmap_to_page(void *addr) +{ + return virt_to_page(addr); +} + +static inline void *kmap(struct page *page) +{ + might_sleep(); + return page_address(page); +} + +static inline void kunmap_high(struct page *page) { } +static inline void kmap_flush_unused(void) { } + +static inline void kunmap(struct page *page) +{ +#ifdef ARCH_HAS_FLUSH_ON_KUNMAP + kunmap_flush_on_unmap(page_address(page)); +#endif +} + +static inline void *kmap_atomic(struct page *page) +{ + preempt_disable(); + pagefault_disable(); + return page_address(page); +} + +static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +{ + return kmap_atomic(page); +} + +static inline void *kmap_atomic_pfn(unsigned long pfn) +{ + return kmap_atomic(pfn_to_page(pfn)); +} + +static inline void __kunmap_atomic(void *addr) +{ +#ifdef ARCH_HAS_FLUSH_ON_KUNMAP + kunmap_flush_on_unmap(addr); +#endif + pagefault_enable(); + preempt_enable(); +} + +static inline unsigned int nr_free_highpages(void) { return 0; } +static inline unsigned long totalhigh_pages(void) { return 0UL; } + +#endif /* CONFIG_HIGHMEM */ + +/* + * Prevent people trying to call kunmap_atomic() as if it were kunmap() + * kunmap_atomic() should get the return value of kmap_atomic, not the page. + */ +#define kunmap_atomic(__addr) \ +do { \ + BUILD_BUG_ON(__same_type((__addr), struct page *)); \ + __kunmap_atomic(__addr); \ +} while (0) + +#endif diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 3180a8f7307f..7d098bd621f6 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -11,199 +11,125 @@ #include -#ifndef ARCH_HAS_FLUSH_ANON_PAGE -static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) -{ -} -#endif - -#ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -static inline void flush_kernel_dcache_page(struct page *page) -{ -} -static inline void flush_kernel_vmap_range(void *vaddr, int size) -{ -} -static inline void invalidate_kernel_vmap_range(void *vaddr, int size) -{ -} -#endif +#include "highmem-internal.h" -/* - * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft. +/** + * kmap - Map a page for long term usage + * @page: Pointer to the page to be mapped + * + * Returns: The virtual address of the mapping + * + * Can only be invoked from preemptible task context because on 32bit + * systems with CONFIG_HIGHMEM enabled this function might sleep. + * + * For systems with CONFIG_HIGHMEM=n and for pages in the low memory area + * this returns the virtual address of the direct kernel mapping. + * + * The returned virtual address is globally visible and valid up to the + * point where it is unmapped via kunmap(). The pointer can be handed to + * other contexts. + * + * For highmem pages on 32bit systems this can be slow as the mapping space + * is limited and protected by a global lock. In case that there is no + * mapping slot available the function blocks until a slot is released via + * kunmap(). */ -#ifdef CONFIG_KMAP_LOCAL -void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); -void *__kmap_local_page_prot(struct page *page, pgprot_t prot); -void kunmap_local_indexed(void *vaddr); -#endif - -#ifdef CONFIG_HIGHMEM -#include +static inline void *kmap(struct page *page); -#ifndef ARCH_HAS_KMAP_FLUSH_TLB -static inline void kmap_flush_tlb(unsigned long addr) { } -#endif - -#ifndef kmap_prot -#define kmap_prot PAGE_KERNEL -#endif - -void *kmap_high(struct page *page); -static inline void *kmap(struct page *page) -{ - void *addr; - - might_sleep(); - if (!PageHighMem(page)) - addr = page_address(page); - else - addr = kmap_high(page); - kmap_flush_tlb((unsigned long)addr); - return addr; -} +/** + * kunmap - Unmap the virtual address mapped by kmap() + * @addr: Virtual address to be unmapped + * + * Counterpart to kmap(). A NOOP for CONFIG_HIGHMEM=n and for mappings of + * pages in the low memory area. + */ +static inline void kunmap(struct page *page); -void kunmap_high(struct page *page); +/** + * kmap_to_page - Get the page for a kmap'ed address + * @addr: The address to look up + * + * Returns: The page which is mapped to @addr. + */ +static inline struct page *kmap_to_page(void *addr); -static inline void kunmap(struct page *page) -{ - might_sleep(); - if (!PageHighMem(page)) - return; - kunmap_high(page); -} +/** + * kmap_flush_unused - Flush all unused kmap mappings in order to + * remove stray mappings + */ +static inline void kmap_flush_unused(void); -/* - * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because - * no global lock is needed and because the kmap code must perform a global TLB - * invalidation when the kmap pool wraps. +/** + * kmap_atomic - Atomically map a page for temporary usage + * @page: Pointer to the page to be mapped + * + * Returns: The virtual address of the mapping + * + * Side effect: On return pagefaults and preemption are disabled. + * + * Can be invoked from any context. * - * However when holding an atomic kmap it is not legal to sleep, so atomic - * kmaps are appropriate for short, tight code paths only. + * Requires careful handling when nesting multiple mappings because the map + * management is stack based. The unmap has to be in the reverse order of + * the map operation: * - * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap - * gives a more generic (and caching) interface. But kmap_atomic can - * be used in IRQ contexts, so in some (very limited) cases we need - * it. + * addr1 = kmap_atomic(page1); + * addr2 = kmap_atomic(page2); + * ... + * kunmap_atomic(addr2); + * kunmap_atomic(addr1); + * + * Unmapping addr1 before addr2 is invalid and causes malfunction. + * + * Contrary to kmap() mappings the mapping is only valid in the context of + * the caller and cannot be handed to other contexts. + * + * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the + * virtual address of the direct mapping. Only real highmem pages are + * temporarily mapped. + * + * While it is significantly faster than kmap() it comes with restrictions + * about the pointer validity and the side effects of disabling page faults + * and preemption. Use it only when absolutely necessary, e.g. from non + * preemptible contexts. */ -static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) -{ - preempt_disable(); - pagefault_disable(); - return __kmap_local_page_prot(page, prot); -} +static inline void *kmap_atomic(struct page *page); -static inline void *kmap_atomic(struct page *page) -{ - return kmap_atomic_prot(page, kmap_prot); -} - -static inline void *kmap_atomic_pfn(unsigned long pfn) -{ - preempt_disable(); - pagefault_disable(); - return __kmap_local_pfn_prot(pfn, kmap_prot); -} - -static inline void __kunmap_atomic(void *addr) -{ - kunmap_local_indexed(addr); -} - -/* declarations for linux/mm/highmem.c */ -unsigned int nr_free_highpages(void); -extern atomic_long_t _totalhigh_pages; -static inline unsigned long totalhigh_pages(void) -{ - return (unsigned long)atomic_long_read(&_totalhigh_pages); -} - -static inline void totalhigh_pages_inc(void) -{ - atomic_long_inc(&_totalhigh_pages); -} - -static inline void totalhigh_pages_add(long count) -{ - atomic_long_add(count, &_totalhigh_pages); -} - -void kmap_flush_unused(void); - -struct page *kmap_to_page(void *addr); - -#else /* CONFIG_HIGHMEM */ - -static inline unsigned int nr_free_highpages(void) { return 0; } - -static inline struct page *kmap_to_page(void *addr) -{ - return virt_to_page(addr); -} - -static inline unsigned long totalhigh_pages(void) { return 0UL; } +/** + * kunmap_atomic - Unmap the virtual address mapped by kmap_atomic() + * @addr: Virtual address to be unmapped + * + * Counterpart to kmap_atomic(). + * + * Undoes the side effects of kmap_atomic(), i.e. reenabling pagefaults and + * preemption. + * + * Other than that a NOOP for CONFIG_HIGHMEM=n and for mappings of pages + * in the low memory area. For real highmen pages the mapping which was + * established with kmap_atomic() is destroyed. + */ -static inline void *kmap(struct page *page) -{ - might_sleep(); - return page_address(page); -} +/* Highmem related interfaces for management code */ +static inline unsigned int nr_free_highpages(void); +static inline unsigned long totalhigh_pages(void); -static inline void kunmap_high(struct page *page) +#ifndef ARCH_HAS_FLUSH_ANON_PAGE +static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) { } - -static inline void kunmap(struct page *page) -{ -#ifdef ARCH_HAS_FLUSH_ON_KUNMAP - kunmap_flush_on_unmap(page_address(page)); #endif -} -static inline void *kmap_atomic(struct page *page) +#ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE +static inline void flush_kernel_dcache_page(struct page *page) { - preempt_disable(); - pagefault_disable(); - return page_address(page); } - -static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +static inline void flush_kernel_vmap_range(void *vaddr, int size) { - return kmap_atomic(page); } - -static inline void *kmap_atomic_pfn(unsigned long pfn) +static inline void invalidate_kernel_vmap_range(void *vaddr, int size) { - return kmap_atomic(pfn_to_page(pfn)); } - -static inline void __kunmap_atomic(void *addr) -{ - /* - * Mostly nothing to do in the CONFIG_HIGHMEM=n case as kunmap_atomic() - * handles re-enabling faults and preemption - */ -#ifdef ARCH_HAS_FLUSH_ON_KUNMAP - kunmap_flush_on_unmap(addr); #endif -} - -#define kmap_flush_unused() do {} while(0) - - -#endif /* CONFIG_HIGHMEM */ - -/* - * Prevent people trying to call kunmap_atomic() as if it were kunmap() - * kunmap_atomic() should get the return value of kmap_atomic, not the page. - */ -#define kunmap_atomic(__addr) \ -do { \ - BUILD_BUG_ON(__same_type((__addr), struct page *)); \ - __kunmap_atomic(__addr); \ - pagefault_enable(); \ - preempt_enable(); \ -} while (0) /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ #ifndef clear_user_highpage -- cgit v1.2.3 From 3fcd6a230fa7d03bffcb831a81b40435c146c12b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 3 Sep 2020 15:23:29 -0700 Subject: x86/cpu: Avoid cpuinfo-induced IPIing of idle CPUs Currently, accessing /proc/cpuinfo sends IPIs to idle CPUs in order to learn their clock frequency. Which is a bit strange, given that waking them from idle likely significantly changes their clock frequency. This commit therefore avoids sending /proc/cpuinfo-induced IPIs to idle CPUs. [ paulmck: Also check for idle in arch_freq_prepare_all(). ] Signed-off-by: Paul E. McKenney Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: --- include/linux/rcutiny.h | 2 ++ include/linux/rcutree.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 7c1ecdb356d8..2a97334eb786 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -89,6 +89,8 @@ static inline void rcu_irq_enter_irqson(void) { } static inline void rcu_irq_exit(void) { } static inline void rcu_irq_exit_preempt(void) { } static inline void rcu_irq_exit_check_preempt(void) { } +#define rcu_is_idle_cpu(cpu) \ + (is_idle_task(current) && !in_nmi() && !in_irq() && !in_serving_softirq()) static inline void exit_rcu(void) { } static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t) { diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 59eb5cd567d7..df578b73960f 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -50,6 +50,7 @@ void rcu_irq_exit(void); void rcu_irq_exit_preempt(void); void rcu_irq_enter_irqson(void); void rcu_irq_exit_irqson(void); +bool rcu_is_idle_cpu(int cpu); #ifdef CONFIG_PROVE_RCU void rcu_irq_exit_check_preempt(void); -- cgit v1.2.3 From ede7dc7fa0af619afc08995776eadb9ff3b0a711 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:54 -0800 Subject: jbd2: rename j_maxlen to j_total_len and add jbd2_journal_max_txn_bufs The on-disk superblock field sb->s_maxlen represents the total size of the journal including the fast commit area and is no more the max number of blocks available for a transaction. The maximum number of blocks available to a transaction is reduced by the number of fast commit blocks. So, this patch renames j_maxlen to j_total_len to better represent its intent. Also, it adds a function to calculate max number of bufs available for a transaction. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-6-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1d5566af48ac..e0b6b53eae64 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -988,9 +988,9 @@ struct journal_s struct block_device *j_fs_dev; /** - * @j_maxlen: Total maximum capacity of the journal region on disk. + * @j_total_len: Total maximum capacity of the journal region on disk. */ - unsigned int j_maxlen; + unsigned int j_total_len; /** * @j_reserved_credits: @@ -1624,6 +1624,11 @@ int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); int jbd2_fc_release_bufs(journal_t *journal); +static inline int jbd2_journal_get_max_txn_bufs(journal_t *journal) +{ + return (journal->j_total_len - journal->j_fc_wbufsize) / 4; +} + /* * is_journal_abort * -- cgit v1.2.3 From a1e5e465b31d6015fccb359d99053b39e5180466 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:55 -0800 Subject: ext4: clean up the JBD2 API that initializes fast commits This patch removes jbd2_fc_init() API and its related functions to simplify enabling fast commits. With this change, the number of fast commit blocks to use is solely determined by the JBD2 layer. So, we move the default value for minimum number of fast commit blocks from ext4/fast_commit.h to include/linux/jbd2.h. However, whether or not to use fast commits is determined by the file system. The file system just sets the fast commit feature using jbd2_journal_set_features(). JBD2 layer then determines how many blocks to use for fast commits (based on the value found in the JBD2 superblock). Note that the JBD2 feature flag of fast commits is just an indication that there are fast commit blocks present on disk. It doesn't tell JBD2 layer about the intent of the file system of whether to it wants to use fast commit or not. That's why, we blindly clear the fast commit flag in journal_reset() after the recovery is done. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-7-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index e0b6b53eae64..b2caf7bbd8e5 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -68,6 +68,7 @@ extern void *jbd2_alloc(size_t size, gfp_t flags); extern void jbd2_free(void *ptr, size_t size); #define JBD2_MIN_JOURNAL_BLOCKS 1024 +#define JBD2_MIN_FC_BLOCKS 256 #ifdef __KERNEL__ @@ -1614,7 +1615,6 @@ extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); extern int jbd2_cleanup_journal_tail(journal_t *); /* Fast commit related APIs */ -int jbd2_fc_init(journal_t *journal, int num_fc_blks); int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); int jbd2_fc_end_commit(journal_t *journal); int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid); -- cgit v1.2.3 From c460e5edc85a063ec9cb60addff93d00ed378701 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:57 -0800 Subject: jbd2: don't use state lock during commit path Variables journal->j_fc_off, journal->j_fc_wbuf are accessed during commit path. Since today we allow only one process to perform a fast commit, there is no need take state lock before accessing these variables. This patch removes these locks and adds comments to describe this. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-9-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b2caf7bbd8e5..5f0ef6380b0c 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -945,8 +945,9 @@ struct journal_s /** * @j_fc_off: * - * Number of fast commit blocks currently allocated. - * [j_state_lock]. + * Number of fast commit blocks currently allocated. Accessed only + * during fast commit. Currently only process can do fast commit, so + * this field is not protected by any lock. */ unsigned long j_fc_off; @@ -1109,8 +1110,9 @@ struct journal_s struct buffer_head **j_wbuf; /** - * @j_fc_wbuf: Array of fast commit bhs for - * jbd2_journal_commit_transaction. + * @j_fc_wbuf: Array of fast commit bhs for fast commit. Accessed only + * during a fast commit. Currently only process can do fast commit, so + * this field is not protected by any lock. */ struct buffer_head **j_fc_wbuf; -- cgit v1.2.3 From 0bce577bf9cae13ae32d391432d0030e3f67fc1d Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:58 -0800 Subject: jbd2: don't pass tid to jbd2_fc_end_commit_fallback() In jbd2_fc_end_commit_fallback(), we know which tid to commit. There's no need for caller to pass it. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-10-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 5f0ef6380b0c..1c49fd62ff2e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1619,7 +1619,7 @@ extern int jbd2_cleanup_journal_tail(journal_t *); /* Fast commit related APIs */ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); int jbd2_fc_end_commit(journal_t *journal); -int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid); +int jbd2_fc_end_commit_fallback(journal_t *journal); int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); int jbd2_submit_inode_data(struct jbd2_inode *jinode); int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); -- cgit v1.2.3 From eda2845ae5e0ae466c1aca715d642b4977311747 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 30 Oct 2020 18:59:15 +0200 Subject: irqdomain: Remove unused of_device_id forward declaration There is no users of of_device_id in irqdomain.h. Drop it. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20201030165919.86234-2-andriy.shevchenko@linux.intel.com --- include/linux/irqdomain.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 71535e87109f..56642188ec21 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -38,7 +38,6 @@ struct device_node; struct irq_domain; -struct of_device_id; struct irq_chip; struct irq_data; struct cpumask; -- cgit v1.2.3 From 08219fb1efae451c83281cb6ba4bb6c35ac88fab Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 30 Oct 2020 18:59:16 +0200 Subject: irqdomain: Add forward declaration of fwnode_handle irqdomain.h is a user of struct fwnode_handle. Add forward declaration of it. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20201030165919.86234-3-andriy.shevchenko@linux.intel.com --- include/linux/irqdomain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 56642188ec21..d21f75d294d7 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -37,6 +37,7 @@ #include struct device_node; +struct fwnode_handle; struct irq_domain; struct irq_chip; struct irq_data; -- cgit v1.2.3 From b6e95788fde8c9bc9da729102085dd36a5a0cda6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 30 Oct 2020 18:59:18 +0200 Subject: irqdomain: Introduce irq_domain_create_legacy() API Introduce irq_domain_create_legacy() API which is functional equivalent to the existing irq_domain_add_legacy(), but takes a pointer to the struct fwnode_handle as a parameter. This is useful for non OF systems. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20201030165919.86234-5-andriy.shevchenko@linux.intel.com --- include/linux/irqdomain.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index d21f75d294d7..77bf7d84c673 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -271,6 +271,12 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, irq_hw_number_t first_hwirq, const struct irq_domain_ops *ops, void *host_data); +struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, + unsigned int size, + unsigned int first_irq, + irq_hw_number_t first_hwirq, + const struct irq_domain_ops *ops, + void *host_data); extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); extern bool irq_domain_check_msi_remap(void); -- cgit v1.2.3 From 1d4ef9b39ebecca827642b8897d2d79ea2026682 Mon Sep 17 00:00:00 2001 From: Cristian Pop Date: Mon, 28 Sep 2020 12:09:55 +0300 Subject: iio: core: Add optional symbolic label to a device channel If a label is defined in the device tree for this channel add that to the channel specific attributes. This is useful for userspace to be able to identify an individual channel. Signed-off-by: Cristian Pop Link: https://lore.kernel.org/r/20200928090959.88842-1-cristian.pop@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 2e45b3ceafa7..9a3cf4815148 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -362,6 +362,8 @@ struct iio_trigger; /* forward declaration */ * and max. For lists, all possible values are enumerated. * @write_raw: function to write a value to the device. * Parameters are the same as for read_raw. + * @read_label: function to request label name for a specified label, + * for better channel identification. * @write_raw_get_fmt: callback function to query the expected * format/precision. If not set by the driver, write_raw * returns IIO_VAL_INT_PLUS_MICRO. @@ -420,6 +422,10 @@ struct iio_info { int val2, long mask); + int (*read_label)(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + char *label); + int (*write_raw_get_fmt)(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, long mask); -- cgit v1.2.3 From 0e30f47232ab57c685258aa91adc3a3e67bd023e Mon Sep 17 00:00:00 2001 From: Pratyush Yadav Date: Mon, 5 Oct 2020 21:01:26 +0530 Subject: mtd: spi-nor: add support for DTR protocol Double Transfer Rate (DTR) is SPI protocol in which data is transferred on each clock edge as opposed to on each clock cycle. Make framework-level changes to allow supporting flashes in DTR mode. Right now, mixed DTR modes are not supported. So, for example a mode like 4S-4D-4D will not work. All phases need to be either DTR or STR. The xSPI spec says that "The program commands provide SPI backward compatible commands for programming data...". So 8D-8D-8D page program opcodes are populated with using 1S-1S-1S opcodes. Signed-off-by: Pratyush Yadav Signed-off-by: Vignesh Raghavendra Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/r/20201005153138.6437-4-p.yadav@ti.com --- include/linux/mtd/spi-nor.h | 51 +++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 60bac2c0ec45..cd549042c53d 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -182,6 +182,7 @@ enum spi_nor_protocol { SNOR_PROTO_1_2_2_DTR = SNOR_PROTO_DTR(1, 2, 2), SNOR_PROTO_1_4_4_DTR = SNOR_PROTO_DTR(1, 4, 4), SNOR_PROTO_1_8_8_DTR = SNOR_PROTO_DTR(1, 8, 8), + SNOR_PROTO_8_8_8_DTR = SNOR_PROTO_DTR(8, 8, 8), }; static inline bool spi_nor_protocol_is_dtr(enum spi_nor_protocol proto) @@ -228,7 +229,7 @@ struct spi_nor_hwcaps { * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly * (Slow) Read. */ -#define SNOR_HWCAPS_READ_MASK GENMASK(14, 0) +#define SNOR_HWCAPS_READ_MASK GENMASK(15, 0) #define SNOR_HWCAPS_READ BIT(0) #define SNOR_HWCAPS_READ_FAST BIT(1) #define SNOR_HWCAPS_READ_1_1_1_DTR BIT(2) @@ -245,11 +246,12 @@ struct spi_nor_hwcaps { #define SNOR_HWCAPS_READ_4_4_4 BIT(9) #define SNOR_HWCAPS_READ_1_4_4_DTR BIT(10) -#define SNOR_HWCAPS_READ_OCTAL GENMASK(14, 11) +#define SNOR_HWCAPS_READ_OCTAL GENMASK(15, 11) #define SNOR_HWCAPS_READ_1_1_8 BIT(11) #define SNOR_HWCAPS_READ_1_8_8 BIT(12) #define SNOR_HWCAPS_READ_8_8_8 BIT(13) #define SNOR_HWCAPS_READ_1_8_8_DTR BIT(14) +#define SNOR_HWCAPS_READ_8_8_8_DTR BIT(15) /* * Page Program capabilities. @@ -260,18 +262,19 @@ struct spi_nor_hwcaps { * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory * implements such commands. */ -#define SNOR_HWCAPS_PP_MASK GENMASK(22, 16) -#define SNOR_HWCAPS_PP BIT(16) +#define SNOR_HWCAPS_PP_MASK GENMASK(23, 16) +#define SNOR_HWCAPS_PP BIT(16) -#define SNOR_HWCAPS_PP_QUAD GENMASK(19, 17) -#define SNOR_HWCAPS_PP_1_1_4 BIT(17) -#define SNOR_HWCAPS_PP_1_4_4 BIT(18) -#define SNOR_HWCAPS_PP_4_4_4 BIT(19) +#define SNOR_HWCAPS_PP_QUAD GENMASK(19, 17) +#define SNOR_HWCAPS_PP_1_1_4 BIT(17) +#define SNOR_HWCAPS_PP_1_4_4 BIT(18) +#define SNOR_HWCAPS_PP_4_4_4 BIT(19) -#define SNOR_HWCAPS_PP_OCTAL GENMASK(22, 20) -#define SNOR_HWCAPS_PP_1_1_8 BIT(20) -#define SNOR_HWCAPS_PP_1_8_8 BIT(21) -#define SNOR_HWCAPS_PP_8_8_8 BIT(22) +#define SNOR_HWCAPS_PP_OCTAL GENMASK(23, 20) +#define SNOR_HWCAPS_PP_1_1_8 BIT(20) +#define SNOR_HWCAPS_PP_1_8_8 BIT(21) +#define SNOR_HWCAPS_PP_8_8_8 BIT(22) +#define SNOR_HWCAPS_PP_8_8_8_DTR BIT(23) #define SNOR_HWCAPS_X_X_X (SNOR_HWCAPS_READ_2_2_2 | \ SNOR_HWCAPS_READ_4_4_4 | \ @@ -279,10 +282,14 @@ struct spi_nor_hwcaps { SNOR_HWCAPS_PP_4_4_4 | \ SNOR_HWCAPS_PP_8_8_8) +#define SNOR_HWCAPS_X_X_X_DTR (SNOR_HWCAPS_READ_8_8_8_DTR | \ + SNOR_HWCAPS_PP_8_8_8_DTR) + #define SNOR_HWCAPS_DTR (SNOR_HWCAPS_READ_1_1_1_DTR | \ SNOR_HWCAPS_READ_1_2_2_DTR | \ SNOR_HWCAPS_READ_1_4_4_DTR | \ - SNOR_HWCAPS_READ_1_8_8_DTR) + SNOR_HWCAPS_READ_1_8_8_DTR | \ + SNOR_HWCAPS_READ_8_8_8_DTR) #define SNOR_HWCAPS_ALL (SNOR_HWCAPS_READ_MASK | \ SNOR_HWCAPS_PP_MASK) @@ -318,6 +325,22 @@ struct spi_nor_controller_ops { int (*erase)(struct spi_nor *nor, loff_t offs); }; +/** + * enum spi_nor_cmd_ext - describes the command opcode extension in DTR mode + * @SPI_NOR_EXT_NONE: no extension. This is the default, and is used in Legacy + * SPI mode + * @SPI_NOR_EXT_REPEAT: the extension is same as the opcode + * @SPI_NOR_EXT_INVERT: the extension is the bitwise inverse of the opcode + * @SPI_NOR_EXT_HEX: the extension is any hex value. The command and opcode + * combine to form a 16-bit opcode. + */ +enum spi_nor_cmd_ext { + SPI_NOR_EXT_NONE = 0, + SPI_NOR_EXT_REPEAT, + SPI_NOR_EXT_INVERT, + SPI_NOR_EXT_HEX, +}; + /* * Forward declarations that are used internally by the core and manufacturer * drivers. @@ -345,6 +368,7 @@ struct spi_nor_flash_parameter; * @program_opcode: the program opcode * @sst_write_second: used by the SST write operation * @flags: flag options for the current SPI NOR (SNOR_F_*) + * @cmd_ext_type: the command opcode extension type for DTR mode. * @read_proto: the SPI protocol for read operations * @write_proto: the SPI protocol for write operations * @reg_proto: the SPI protocol for read_reg/write_reg/erase operations @@ -376,6 +400,7 @@ struct spi_nor { enum spi_nor_protocol reg_proto; bool sst_write_second; u32 flags; + enum spi_nor_cmd_ext cmd_ext_type; const struct spi_nor_controller_ops *controller_ops; -- cgit v1.2.3 From d73ee7534cc537b98b61bfd83dbce5bb12aecb80 Mon Sep 17 00:00:00 2001 From: Pratyush Yadav Date: Mon, 5 Oct 2020 21:01:35 +0530 Subject: mtd: spi-nor: core: perform a Soft Reset on shutdown Perform a Soft Reset on shutdown on flashes that support it so that the flash can be reset to its initial state and any configurations made by spi-nor (given that they're only done in volatile registers) will be reset. This will hand back the flash in pristine state for any further operations on it. Signed-off-by: Pratyush Yadav Signed-off-by: Vignesh Raghavendra Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/r/20201005153138.6437-13-p.yadav@ti.com --- include/linux/mtd/spi-nor.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index cd549042c53d..299685d15dc2 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -51,6 +51,8 @@ #define SPINOR_OP_CLFSR 0x50 /* Clear flag status register */ #define SPINOR_OP_RDEAR 0xc8 /* Read Extended Address Register */ #define SPINOR_OP_WREAR 0xc5 /* Write Extended Address Register */ +#define SPINOR_OP_SRSTEN 0x66 /* Software Reset Enable */ +#define SPINOR_OP_SRST 0x99 /* Software Reset */ /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */ #define SPINOR_OP_READ_4B 0x13 /* Read data bytes (low frequency) */ -- cgit v1.2.3 From 43676605f890b218e551f396a55dbaea7799acb4 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 3 Nov 2020 10:30:10 +0100 Subject: drm/ttm: Add vmap/vunmap to TTM and TTM GEM helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new functions ttm_bo_{vmap,vunmap}() map and unmap a TTM BO in kernel address space. The mapping's address is returned as struct dma_buf_map. Each function is a simplified version of TTM's existing kmap code. Both functions respect the memory's location ani/or writecombine flags. On top TTM's functions, GEM TTM helpers got drm_gem_ttm_{vmap,vunmap}(), two helpers that convert a GEM object into the TTM BO and forward the call to TTM's vmap/vunmap. These helpers can be dropped into the rsp GEM object callbacks. v5: * use size_t for storing mapping size (Christian) * ignore premapped memory areas correctly in ttm_bo_vunmap() * rebase onto latest TTM interfaces (Christian) * remove BUG() from ttm_bo_vmap() (Christian) v4: * drop ttm_kmap_obj_to_dma_buf() in favor of vmap helpers (Daniel, Christian) Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Acked-by: Daniel Vetter Tested-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20201103093015.1063-6-tzimmermann@suse.de --- include/linux/dma-buf-map.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h index fd1aba545fdf..2e8bbecb5091 100644 --- a/include/linux/dma-buf-map.h +++ b/include/linux/dma-buf-map.h @@ -45,6 +45,12 @@ * * dma_buf_map_set_vaddr(&map. 0xdeadbeaf); * + * To set an address in I/O memory, use dma_buf_map_set_vaddr_iomem(). + * + * .. code-block:: c + * + * dma_buf_map_set_vaddr_iomem(&map. 0xdeadbeaf); + * * Test if a mapping is valid with either dma_buf_map_is_set() or * dma_buf_map_is_null(). * @@ -118,6 +124,20 @@ static inline void dma_buf_map_set_vaddr(struct dma_buf_map *map, void *vaddr) map->is_iomem = false; } +/** + * dma_buf_map_set_vaddr_iomem - Sets a dma-buf mapping structure to an address in I/O memory + * @map: The dma-buf mapping structure + * @vaddr_iomem: An I/O-memory address + * + * Sets the address and the I/O-memory flag. + */ +static inline void dma_buf_map_set_vaddr_iomem(struct dma_buf_map *map, + void __iomem *vaddr_iomem) +{ + map->vaddr_iomem = vaddr_iomem; + map->is_iomem = true; +} + /** * dma_buf_map_is_equal - Compares two dma-buf mapping structures for equality * @lhs: The dma-buf mapping structure -- cgit v1.2.3 From b4e7090c242ec64bfa77a69ea7c3cceba21c7a65 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 3 Nov 2020 10:30:14 +0100 Subject: dma-buf-map: Add memcpy and pointer-increment interfaces To do framebuffer updates, one needs memcpy from system memory and a pointer-increment function. Add both interfaces with documentation. v5: * include to build on sparc64 (Sam) Signed-off-by: Thomas Zimmermann Reviewed-by: Sam Ravnborg Tested-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20201103093015.1063-10-tzimmermann@suse.de --- include/linux/dma-buf-map.h | 73 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h index 2e8bbecb5091..583a3a1f9447 100644 --- a/include/linux/dma-buf-map.h +++ b/include/linux/dma-buf-map.h @@ -7,6 +7,7 @@ #define __DMA_BUF_MAP_H__ #include +#include /** * DOC: overview @@ -32,6 +33,14 @@ * accessing the buffer. Use the returned instance and the helper functions * to access the buffer's memory in the correct way. * + * The type :c:type:`struct dma_buf_map ` and its helpers are + * actually independent from the dma-buf infrastructure. When sharing buffers + * among devices, drivers have to know the location of the memory to access + * the buffers in a safe way. :c:type:`struct dma_buf_map ` + * solves this problem for dma-buf and its users. If other drivers or + * sub-systems require similar functionality, the type could be generalized + * and moved to a more prominent header file. + * * Open-coding access to :c:type:`struct dma_buf_map ` is * considered bad style. Rather then accessing its fields directly, use one * of the provided helper functions, or implement your own. For example, @@ -51,6 +60,14 @@ * * dma_buf_map_set_vaddr_iomem(&map. 0xdeadbeaf); * + * Instances of struct dma_buf_map do not have to be cleaned up, but + * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings + * always refer to system memory. + * + * .. code-block:: c + * + * dma_buf_map_clear(&map); + * * Test if a mapping is valid with either dma_buf_map_is_set() or * dma_buf_map_is_null(). * @@ -73,17 +90,19 @@ * if (dma_buf_map_is_equal(&sys_map, &io_map)) * // always false * - * Instances of struct dma_buf_map do not have to be cleaned up, but - * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings - * always refer to system memory. + * A set up instance of struct dma_buf_map can be used to access or manipulate + * the buffer memory. Depending on the location of the memory, the provided + * helpers will pick the correct operations. Data can be copied into the memory + * with dma_buf_map_memcpy_to(). The address can be manipulated with + * dma_buf_map_incr(). * - * The type :c:type:`struct dma_buf_map ` and its helpers are - * actually independent from the dma-buf infrastructure. When sharing buffers - * among devices, drivers have to know the location of the memory to access - * the buffers in a safe way. :c:type:`struct dma_buf_map ` - * solves this problem for dma-buf and its users. If other drivers or - * sub-systems require similar functionality, the type could be generalized - * and moved to a more prominent header file. + * .. code-block:: c + * + * const void *src = ...; // source buffer + * size_t len = ...; // length of src + * + * dma_buf_map_memcpy_to(&map, src, len); + * dma_buf_map_incr(&map, len); // go to first byte after the memcpy */ /** @@ -210,4 +229,38 @@ static inline void dma_buf_map_clear(struct dma_buf_map *map) } } +/** + * dma_buf_map_memcpy_to - Memcpy into dma-buf mapping + * @dst: The dma-buf mapping structure + * @src: The source buffer + * @len: The number of byte in src + * + * Copies data into a dma-buf mapping. The source buffer is in system + * memory. Depending on the buffer's location, the helper picks the correct + * method of accessing the memory. + */ +static inline void dma_buf_map_memcpy_to(struct dma_buf_map *dst, const void *src, size_t len) +{ + if (dst->is_iomem) + memcpy_toio(dst->vaddr_iomem, src, len); + else + memcpy(dst->vaddr, src, len); +} + +/** + * dma_buf_map_incr - Increments the address stored in a dma-buf mapping + * @map: The dma-buf mapping structure + * @incr: The number of bytes to increment + * + * Increments the address stored in a dma-buf mapping. Depending on the + * buffer's location, the correct value will be updated. + */ +static inline void dma_buf_map_incr(struct dma_buf_map *map, size_t incr) +{ + if (map->is_iomem) + map->vaddr_iomem += incr; + else + map->vaddr += incr; +} + #endif /* __DMA_BUF_MAP_H__ */ -- cgit v1.2.3 From 22447a99c97e353bde8f90c2353873f27681d57c Mon Sep 17 00:00:00 2001 From: Pawel Czarnecki Date: Tue, 13 Oct 2020 16:45:52 +0200 Subject: drivers/soc/litex: add LiteX SoC Controller driver This commit adds driver for the FPGA-based LiteX SoC Controller from LiteX SoC builder. Co-developed-by: Mateusz Holenko Signed-off-by: Mateusz Holenko Signed-off-by: Pawel Czarnecki Signed-off-by: Stafford Horne --- include/linux/litex.h | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 include/linux/litex.h (limited to 'include/linux') diff --git a/include/linux/litex.h b/include/linux/litex.h new file mode 100644 index 000000000000..40f5be503593 --- /dev/null +++ b/include/linux/litex.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common LiteX header providing + * helper functions for accessing CSRs. + * + * Implementation of the functions is provided by + * the LiteX SoC Controller driver. + * + * Copyright (C) 2019-2020 Antmicro + */ + +#ifndef _LINUX_LITEX_H +#define _LINUX_LITEX_H + +#include +#include +#include + +/* + * The parameters below are true for LiteX SoCs configured for 8-bit CSR Bus, + * 32-bit aligned. + * + * Supporting other configurations will require extending the logic in this + * header and in the LiteX SoC controller driver. + */ +#define LITEX_REG_SIZE 0x4 +#define LITEX_SUBREG_SIZE 0x1 +#define LITEX_SUBREG_SIZE_BIT (LITEX_SUBREG_SIZE * 8) + +#define WRITE_LITEX_SUBREGISTER(val, base_offset, subreg_id) \ + writel((u32 __force)cpu_to_le32(val), base_offset + (LITEX_REG_SIZE * subreg_id)) + +#define READ_LITEX_SUBREGISTER(base_offset, subreg_id) \ + le32_to_cpu((__le32 __force)readl(base_offset + (LITEX_REG_SIZE * subreg_id))) + +void litex_set_reg(void __iomem *reg, unsigned long reg_sz, unsigned long val); + +unsigned long litex_get_reg(void __iomem *reg, unsigned long reg_sz); + +static inline void litex_write8(void __iomem *reg, u8 val) +{ + WRITE_LITEX_SUBREGISTER(val, reg, 0); +} + +static inline void litex_write16(void __iomem *reg, u16 val) +{ + WRITE_LITEX_SUBREGISTER(val >> 8, reg, 0); + WRITE_LITEX_SUBREGISTER(val, reg, 1); +} + +static inline void litex_write32(void __iomem *reg, u32 val) +{ + WRITE_LITEX_SUBREGISTER(val >> 24, reg, 0); + WRITE_LITEX_SUBREGISTER(val >> 16, reg, 1); + WRITE_LITEX_SUBREGISTER(val >> 8, reg, 2); + WRITE_LITEX_SUBREGISTER(val, reg, 3); +} + +static inline void litex_write64(void __iomem *reg, u64 val) +{ + WRITE_LITEX_SUBREGISTER(val >> 56, reg, 0); + WRITE_LITEX_SUBREGISTER(val >> 48, reg, 1); + WRITE_LITEX_SUBREGISTER(val >> 40, reg, 2); + WRITE_LITEX_SUBREGISTER(val >> 32, reg, 3); + WRITE_LITEX_SUBREGISTER(val >> 24, reg, 4); + WRITE_LITEX_SUBREGISTER(val >> 16, reg, 5); + WRITE_LITEX_SUBREGISTER(val >> 8, reg, 6); + WRITE_LITEX_SUBREGISTER(val, reg, 7); +} + +static inline u8 litex_read8(void __iomem *reg) +{ + return READ_LITEX_SUBREGISTER(reg, 0); +} + +static inline u16 litex_read16(void __iomem *reg) +{ + return (READ_LITEX_SUBREGISTER(reg, 0) << 8) + | (READ_LITEX_SUBREGISTER(reg, 1)); +} + +static inline u32 litex_read32(void __iomem *reg) +{ + return (READ_LITEX_SUBREGISTER(reg, 0) << 24) + | (READ_LITEX_SUBREGISTER(reg, 1) << 16) + | (READ_LITEX_SUBREGISTER(reg, 2) << 8) + | (READ_LITEX_SUBREGISTER(reg, 3)); +} + +static inline u64 litex_read64(void __iomem *reg) +{ + return ((u64)READ_LITEX_SUBREGISTER(reg, 0) << 56) + | ((u64)READ_LITEX_SUBREGISTER(reg, 1) << 48) + | ((u64)READ_LITEX_SUBREGISTER(reg, 2) << 40) + | ((u64)READ_LITEX_SUBREGISTER(reg, 3) << 32) + | ((u64)READ_LITEX_SUBREGISTER(reg, 4) << 24) + | ((u64)READ_LITEX_SUBREGISTER(reg, 5) << 16) + | ((u64)READ_LITEX_SUBREGISTER(reg, 6) << 8) + | ((u64)READ_LITEX_SUBREGISTER(reg, 7)); +} + +#endif /* _LINUX_LITEX_H */ -- cgit v1.2.3 From 267fb27352b6fc9fdbad753127a239f75618ecbc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 15:50:32 +0100 Subject: perf: Reduce stack usage of perf_output_begin() __perf_output_begin() has an on-stack struct perf_sample_data in the unlikely case it needs to generate a LOST record. However, every call to perf_output_begin() must already have a perf_sample_data on-stack. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201030151954.985416146@infradead.org --- include/linux/perf_event.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0c19d279b97f..b775ae0a8c87 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1400,11 +1400,14 @@ perf_event_addr_filters(struct perf_event *event) extern void perf_event_addr_filters_sync(struct perf_event *event); extern int perf_output_begin(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size); extern int perf_output_begin_forward(struct perf_output_handle *handle, - struct perf_event *event, - unsigned int size); + struct perf_sample_data *data, + struct perf_event *event, + unsigned int size); extern int perf_output_begin_backward(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size); -- cgit v1.2.3 From 76a4efa80900fc40e0fdf243b42aec9fb8c35d24 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 12:14:21 +0100 Subject: perf/arch: Remove perf_sample_data::regs_user_copy struct perf_sample_data lives on-stack, we should be careful about it's size. Furthermore, the pt_regs copy in there is only because x86_64 is a trainwreck, solve it differently. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Steven Rostedt Link: https://lkml.kernel.org/r/20201030151955.258178461@infradead.org --- include/linux/perf_event.h | 6 ------ include/linux/perf_regs.h | 6 ++---- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b775ae0a8c87..96450f6fb1de 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1022,13 +1022,7 @@ struct perf_sample_data { struct perf_callchain_entry *callchain; u64 aux_size; - /* - * regs_user may point to task_pt_regs or to regs_user_copy, depending - * on arch details. - */ struct perf_regs regs_user; - struct pt_regs regs_user_copy; - struct perf_regs regs_intr; u64 stack_user_size; diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index 2d12e97d5e7b..f632c5725f16 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -20,8 +20,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx); int perf_reg_validate(u64 mask); u64 perf_reg_abi(struct task_struct *task); void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy); + struct pt_regs *regs); #else #define PERF_REG_EXTENDED_MASK 0 @@ -42,8 +41,7 @@ static inline u64 perf_reg_abi(struct task_struct *task) } static inline void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); -- cgit v1.2.3 From 21774fd81a51ec1eccd59caf922d387977acd2aa Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 15 Oct 2020 14:57:26 -0400 Subject: kernfs: bring names in comments in line with code Fix two stragglers in the comments of the below rename operation. Fixes: adc5e8b58f48 ("kernfs: drop s_ prefix from kernfs_node members") Acked-by: Tejun Heo Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20201015185726.1386868-1-willemdebruijn.kernel@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 89f6a4214a70..9e8ca8743c26 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -116,7 +116,7 @@ struct kernfs_elem_attr { * kernfs node is represented by single kernfs_node. Most fields are * private to kernfs and shouldn't be accessed directly by kernfs users. * - * As long as s_count reference is held, the kernfs_node itself is + * As long as count reference is held, the kernfs_node itself is * accessible. Dereferencing elem or any other outer entity requires * active reference. */ -- cgit v1.2.3 From 9f38abefd37af8726d59706b9b84530630b6b620 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:33:17 +0200 Subject: uio: fix some kernel-doc markups The definitions for (devm_)uio_register_device should be at the header file, as the macros are there. The ones inside uio.c refer, instead, to __(devm_)uio_register_device. Update them and add new kernel-doc markups for the macros. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/82ab7b68d271aeda7396e369ff8a629491b9d628.1603469755.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/uio_driver.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 54bf6b118401..47c5962b876b 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -117,6 +117,14 @@ extern int __must_check struct uio_info *info); /* use a define to avoid include chaining to get THIS_MODULE */ + +/** + * uio_register_device - register a new userspace IO device + * @parent: parent device + * @info: UIO device capabilities + * + * returns zero on success or a negative error code. + */ #define uio_register_device(parent, info) \ __uio_register_device(THIS_MODULE, parent, info) @@ -129,6 +137,14 @@ extern int __must_check struct uio_info *info); /* use a define to avoid include chaining to get THIS_MODULE */ + +/** + * devm_uio_register_device - Resource managed uio_register_device() + * @parent: parent device + * @info: UIO device capabilities + * + * returns zero on success or a negative error code. + */ #define devm_uio_register_device(parent, info) \ __devm_uio_register_device(THIS_MODULE, parent, info) -- cgit v1.2.3 From 33c0c9bdf7a59051a654cd98b7d2b48ce0080967 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:32:56 +0200 Subject: drivers: base: fix some kernel-doc markups class_create is actually defined at the header. Fix the markup there and add a new one at the right place. While here, also fix some markups for functions that have different names between their prototypes and kernel-doc comments. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/2fb6efd6a1f90d69ff73bf579566079cbb051e15.1603469755.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/class.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device/class.h b/include/linux/device/class.h index e8d470c457d1..e61ec5502019 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -256,6 +256,20 @@ extern void class_destroy(struct class *cls); /* This is a #define to keep the compiler from merging different * instances of the __key variable */ + +/** + * class_create - create a struct class structure + * @owner: pointer to the module that is to "own" this struct class + * @name: pointer to a string for the name of this class. + * + * This is used to create a struct class pointer that can then be used + * in calls to device_create(). + * + * Returns &struct class pointer on success, or ERR_PTR() on error. + * + * Note, the pointer created here is to be destroyed when finished by + * making a call to class_destroy(). + */ #define class_create(owner, name) \ ({ \ static struct lock_class_key __key; \ -- cgit v1.2.3 From a18394269fc87276963e8d965c730900178d7e4b Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 7 Nov 2020 21:49:07 +0100 Subject: net: core: add dev_get_tstats64 as a ndo_get_stats64 implementation It's a frequent pattern to use netdev->stats for the less frequently accessed counters and per-cpu counters for the frequently accessed counters (rx/tx bytes/packets). Add a default ndo_get_stats64() implementation for this use case. Reviewed-by: Florian Fainelli Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a53ed2d1ed1d..7ce648a564f7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4527,6 +4527,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, const struct net_device_stats *netdev_stats); void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, const struct pcpu_sw_netstats __percpu *netstats); +void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; -- cgit v1.2.3 From 9a2a9ebc0a758d887ee06e067e9f7f0b36ff7574 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:25:57 +0100 Subject: cpufreq: Introduce governor flags A new cpufreq governor flag will be added subsequently, so replace the bool dynamic_switching fleid in struct cpufreq_governor with a flags field and introduce CPUFREQ_GOV_DYNAMIC_SWITCHING to set for the "dynamic switching" governors instead of it. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1eaa04f1bae6..9bdfcf3c4748 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -570,12 +570,17 @@ struct cpufreq_governor { char *buf); int (*store_setspeed) (struct cpufreq_policy *policy, unsigned int freq); - /* For governors which change frequency dynamically by themselves */ - bool dynamic_switching; struct list_head governor_list; struct module *owner; + u8 flags; }; +/* Governor flags */ + +/* For governors which change frequency dynamically by themselves */ +#define CPUFREQ_GOV_DYNAMIC_SWITCHING BIT(0) + + /* Pass a target to the cpufreq driver */ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq); -- cgit v1.2.3 From 218f66870181bec7aaa6e3c72f346039c590c3c2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:26:10 +0100 Subject: cpufreq: Introduce CPUFREQ_GOV_STRICT_TARGET Introduce a new governor flag, CPUFREQ_GOV_STRICT_TARGET, for the governors that want the target frequency to be set exactly to the given value without leaving any room for adjustments on the hardware side and set this flag for the powersave and performance governors. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 9bdfcf3c4748..6eb9a3b8ec7b 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -580,6 +580,9 @@ struct cpufreq_governor { /* For governors which change frequency dynamically by themselves */ #define CPUFREQ_GOV_DYNAMIC_SWITCHING BIT(0) +/* For governors wanting the target frequency to be set exactly */ +#define CPUFREQ_GOV_STRICT_TARGET BIT(1) + /* Pass a target to the cpufreq driver */ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, -- cgit v1.2.3 From ea9364bbadf11f0c55802cf11387d74f524cee84 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:26:37 +0100 Subject: cpufreq: Add strict_target to struct cpufreq_policy Add a new field to be set when the CPUFREQ_GOV_STRICT_TARGET flag is set for the current governor to struct cpufreq_policy, so that the drivers needing to check CPUFREQ_GOV_STRICT_TARGET do not have to access the governor object during every frequency transition. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 6eb9a3b8ec7b..acbad3b36322 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -109,6 +109,12 @@ struct cpufreq_policy { bool fast_switch_possible; bool fast_switch_enabled; + /* + * Set if the CPUFREQ_GOV_STRICT_TARGET flag is set for the current + * governor. + */ + bool strict_target; + /* * Preferred average time interval between consecutive invocations of * the driver to set the frequency for this policy. To be set by the -- cgit v1.2.3 From a8b62fd0850503cf1e557d7e5a98d3f1f5c25eef Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 21 Sep 2020 12:58:17 +0200 Subject: stop_machine: Add function and caller debug info Crashes in stop-machine are hard to connect to the calling code, add a little something to help with that. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201023102346.116513635@infradead.org --- include/linux/stop_machine.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 76d8b09384a7..30577c3aecf8 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -24,6 +24,7 @@ typedef int (*cpu_stop_fn_t)(void *arg); struct cpu_stop_work { struct list_head list; /* cpu_stopper->works */ cpu_stop_fn_t fn; + unsigned long caller; void *arg; struct cpu_stop_done *done; }; @@ -36,6 +37,8 @@ void stop_machine_park(int cpu); void stop_machine_unpark(int cpu); void stop_machine_yield(const struct cpumask *cpumask); +extern void print_stop_info(const char *log_lvl, struct task_struct *task); + #else /* CONFIG_SMP */ #include @@ -80,6 +83,8 @@ static inline bool stop_one_cpu_nowait(unsigned int cpu, return false; } +static inline void print_stop_info(const char *log_lvl, struct task_struct *task) { } + #endif /* CONFIG_SMP */ /* -- cgit v1.2.3 From 1cf12e08bc4d50a76b80c42a3109c53d8794a0c9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Sep 2020 09:27:18 +0200 Subject: sched/hotplug: Consolidate task migration on CPU unplug With the new mechanism which kicks tasks off the outgoing CPU at the end of schedule() the situation on an outgoing CPU right before the stopper thread brings it down completely is: - All user tasks and all unbound kernel threads have either been migrated away or are not running and the next wakeup will move them to a online CPU. - All per CPU kernel threads, except cpu hotplug thread and the stopper thread have either been unbound or parked by the responsible CPU hotplug callback. That means that at the last step before the stopper thread is invoked the cpu hotplug thread is the last legitimate running task on the outgoing CPU. Add a final wait step right before the stopper thread is kicked which ensures that any still running tasks on the way to park or on the way to kick themself of the CPU are either sleeping or gone. This allows to remove the migrate_tasks() crutch in sched_cpu_dying(). If sched_cpu_dying() detects that there is still another running task aside of the stopper thread then it will explode with the appropriate fireworks. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201023102346.547163969@infradead.org --- include/linux/cpuhotplug.h | 1 + include/linux/sched/hotplug.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index bc56287a1ed1..0042ef362511 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -152,6 +152,7 @@ enum cpuhp_state { CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, CPUHP_AP_ONLINE_IDLE, + CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_X86_VDSO_VMA_ONLINE, CPUHP_AP_IRQ_AFFINITY_ONLINE, diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h index 9a62ffdd296f..412cdaba33eb 100644 --- a/include/linux/sched/hotplug.h +++ b/include/linux/sched/hotplug.h @@ -11,8 +11,10 @@ extern int sched_cpu_activate(unsigned int cpu); extern int sched_cpu_deactivate(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU +extern int sched_cpu_wait_empty(unsigned int cpu); extern int sched_cpu_dying(unsigned int cpu); #else +# define sched_cpu_wait_empty NULL # define sched_cpu_dying NULL #endif -- cgit v1.2.3 From af449901b84c98cbd84a0113223ba3bcfcb12a26 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Sep 2020 10:38:30 +0200 Subject: sched: Add migrate_disable() Add the base migrate_disable() support (under protest). While migrate_disable() is (currently) required for PREEMPT_RT, it is also one of the biggest flaws in the system. Notably this is just the base implementation, it is broken vs sched_setaffinity() and hotplug, both solved in additional patches for ease of review. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201023102346.818170844@infradead.org --- include/linux/preempt.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 3 +++ 2 files changed, 68 insertions(+) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 7d9c1c0e149c..97ba7c920653 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -322,6 +322,69 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, #endif +#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) + +/* + * Migrate-Disable and why it is (strongly) undesired. + * + * The premise of the Real-Time schedulers we have on Linux + * (SCHED_FIFO/SCHED_DEADLINE) is that M CPUs can/will run M tasks + * concurrently, provided there are sufficient runnable tasks, also known as + * work-conserving. For instance SCHED_DEADLINE tries to schedule the M + * earliest deadline threads, and SCHED_FIFO the M highest priority threads. + * + * The correctness of various scheduling models depends on this, but is it + * broken by migrate_disable() that doesn't imply preempt_disable(). Where + * preempt_disable() implies an immediate priority ceiling, preemptible + * migrate_disable() allows nesting. + * + * The worst case is that all tasks preempt one another in a migrate_disable() + * region and stack on a single CPU. This then reduces the available bandwidth + * to a single CPU. And since Real-Time schedulability theory considers the + * Worst-Case only, all Real-Time analysis shall revert to single-CPU + * (instantly solving the SMP analysis problem). + * + * + * The reason we have it anyway. + * + * PREEMPT_RT breaks a number of assumptions traditionally held. By forcing a + * number of primitives into becoming preemptible, they would also allow + * migration. This turns out to break a bunch of per-cpu usage. To this end, + * all these primitives employ migirate_disable() to restore this implicit + * assumption. + * + * This is a 'temporary' work-around at best. The correct solution is getting + * rid of the above assumptions and reworking the code to employ explicit + * per-cpu locking or short preempt-disable regions. + * + * The end goal must be to get rid of migrate_disable(), alternatively we need + * a schedulability theory that does not depend on abritrary migration. + * + * + * Notes on the implementation. + * + * The implementation is particularly tricky since existing code patterns + * dictate neither migrate_disable() nor migrate_enable() is allowed to block. + * This means that it cannot use cpus_read_lock() to serialize against hotplug, + * nor can it easily migrate itself into a pending affinity mask change on + * migrate_enable(). + * + * + * Note: even non-work-conserving schedulers like semi-partitioned depends on + * migration, so migrate_disable() is not only a problem for + * work-conserving schedulers. + * + */ +extern void migrate_disable(void); +extern void migrate_enable(void); + +#elif defined(CONFIG_PREEMPT_RT) + +static inline void migrate_disable(void) { } +static inline void migrate_enable(void) { } + +#else /* !CONFIG_PREEMPT_RT */ + /** * migrate_disable - Prevent migration of the current task * @@ -352,4 +415,6 @@ static __always_inline void migrate_enable(void) preempt_enable(); } +#endif /* CONFIG_SMP && CONFIG_PREEMPT_RT */ + #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 063cd120b459..0732356c0eca 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -714,6 +714,9 @@ struct task_struct { int nr_cpus_allowed; const cpumask_t *cpus_ptr; cpumask_t cpus_mask; +#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) + int migration_disabled; +#endif #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; -- cgit v1.2.3 From 6d337eab041d56bb8f0e7794f39906c21054c512 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 18 Sep 2020 17:24:31 +0200 Subject: sched: Fix migrate_disable() vs set_cpus_allowed_ptr() Concurrent migrate_disable() and set_cpus_allowed_ptr() has interesting features. We rely on set_cpus_allowed_ptr() to not return until the task runs inside the provided mask. This expectation is exported to userspace. This means that any set_cpus_allowed_ptr() caller must wait until migrate_enable() allows migrations. At the same time, we don't want migrate_enable() to schedule, due to patterns like: preempt_disable(); migrate_disable(); ... migrate_enable(); preempt_enable(); And: raw_spin_lock(&B); spin_unlock(&A); this means that when migrate_enable() must restore the affinity mask, it cannot wait for completion thereof. Luck will have it that that is exactly the case where there is a pending set_cpus_allowed_ptr(), so let that provide storage for the async stop machine. Much thanks to Valentin who used TLA+ most effective and found lots of 'interesting' cases. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201023102346.921768277@infradead.org --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0732356c0eca..90a0c92741d7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -714,6 +714,7 @@ struct task_struct { int nr_cpus_allowed; const cpumask_t *cpus_ptr; cpumask_t cpus_mask; + void *migration_pending; #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) int migration_disabled; #endif -- cgit v1.2.3 From 14e292f8d45380c519a83d9b0f37089a17eedcdf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 1 Oct 2020 15:54:14 +0200 Subject: sched,rt: Use cpumask_any*_distribute() Replace a bunch of cpumask_any*() instances with cpumask_any*_distribute(), by injecting this little bit of random in cpu selection, we reduce the chance two competing balance operations working off the same lowest_mask pick the same CPU. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201023102347.190759694@infradead.org --- include/linux/cpumask.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index f0d895d6ac39..383684e30f12 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -199,6 +199,11 @@ static inline int cpumask_any_and_distribute(const struct cpumask *src1p, return cpumask_next_and(-1, src1p, src2p); } +static inline int cpumask_any_distribute(const struct cpumask *srcp) +{ + return cpumask_first(srcp); +} + #define for_each_cpu(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) #define for_each_cpu_not(cpu, mask) \ @@ -252,6 +257,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); unsigned int cpumask_local_spread(unsigned int i, int node); int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p); +int cpumask_any_distribute(const struct cpumask *srcp); /** * for_each_cpu - iterate over every cpu in a mask -- cgit v1.2.3 From a7c81556ec4d341dfdbf2cc478ead89d73e474a7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Sep 2020 17:06:07 +0200 Subject: sched: Fix migrate_disable() vs rt/dl balancing In order to minimize the interference of migrate_disable() on lower priority tasks, which can be deprived of runtime due to being stuck below a higher priority task. Teach the RT/DL balancers to push away these higher priority tasks when a lower priority task gets selected to run on a freshly demoted CPU (pull). This adds migration interference to the higher priority task, but restores bandwidth to system that would otherwise be irrevocably lost. Without this it would be possible to have all tasks on the system stuck on a single CPU, each task preempted in a migrate_disable() section with a single high priority task running. This way we can still approximate running the M highest priority tasks on the system. Migrating the top task away is (ofcourse) still subject to migrate_disable() too, which means the lower task is subject to an interference equivalent to the worst case migrate_disable() section. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201023102347.499155098@infradead.org --- include/linux/preempt.h | 40 ++++++++++++++++++++++------------------ include/linux/sched.h | 3 ++- 2 files changed, 24 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 97ba7c920653..8b43922e65df 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -325,24 +325,28 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) /* - * Migrate-Disable and why it is (strongly) undesired. - * - * The premise of the Real-Time schedulers we have on Linux - * (SCHED_FIFO/SCHED_DEADLINE) is that M CPUs can/will run M tasks - * concurrently, provided there are sufficient runnable tasks, also known as - * work-conserving. For instance SCHED_DEADLINE tries to schedule the M - * earliest deadline threads, and SCHED_FIFO the M highest priority threads. - * - * The correctness of various scheduling models depends on this, but is it - * broken by migrate_disable() that doesn't imply preempt_disable(). Where - * preempt_disable() implies an immediate priority ceiling, preemptible - * migrate_disable() allows nesting. - * - * The worst case is that all tasks preempt one another in a migrate_disable() - * region and stack on a single CPU. This then reduces the available bandwidth - * to a single CPU. And since Real-Time schedulability theory considers the - * Worst-Case only, all Real-Time analysis shall revert to single-CPU - * (instantly solving the SMP analysis problem). + * Migrate-Disable and why it is undesired. + * + * When a preempted task becomes elegible to run under the ideal model (IOW it + * becomes one of the M highest priority tasks), it might still have to wait + * for the preemptee's migrate_disable() section to complete. Thereby suffering + * a reduction in bandwidth in the exact duration of the migrate_disable() + * section. + * + * Per this argument, the change from preempt_disable() to migrate_disable() + * gets us: + * + * - a higher priority tasks gains reduced wake-up latency; with preempt_disable() + * it would have had to wait for the lower priority task. + * + * - a lower priority tasks; which under preempt_disable() could've instantly + * migrated away when another CPU becomes available, is now constrained + * by the ability to push the higher priority task away, which might itself be + * in a migrate_disable() section, reducing it's available bandwidth. + * + * IOW it trades latency / moves the interference term, but it stays in the + * system, and as long as it remains unbounded, the system is not fully + * deterministic. * * * The reason we have it anyway. diff --git a/include/linux/sched.h b/include/linux/sched.h index 90a0c92741d7..3af9d52fe093 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -716,8 +716,9 @@ struct task_struct { cpumask_t cpus_mask; void *migration_pending; #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) - int migration_disabled; + unsigned short migration_disabled; #endif + unsigned short migration_flags; #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; -- cgit v1.2.3 From c250d50fe2ce627ca9805d9c8ac11cbbf922a4a6 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 5 Nov 2020 12:50:01 +0000 Subject: PM: EM: Add a flag indicating units of power values in Energy Model There are different platforms and devices which might use different scale for the power values. Kernel sub-systems might need to check if all Energy Model (EM) devices are using the same scale. Address that issue and store the information inside EM for each device. Thanks to that they can be easily compared and proper action triggered. Suggested-by: Daniel Lezcano Reviewed-by: Quentin Perret Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index b67a51c574b9..3a33c738d876 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -29,6 +29,8 @@ struct em_perf_state { * em_perf_domain - Performance domain * @table: List of performance states, in ascending order * @nr_perf_states: Number of performance states + * @milliwatts: Flag indicating the power values are in milli-Watts + * or some other scale. * @cpus: Cpumask covering the CPUs of the domain. It's here * for performance reasons to avoid potential cache * misses during energy calculations in the scheduler @@ -43,6 +45,7 @@ struct em_perf_state { struct em_perf_domain { struct em_perf_state *table; int nr_perf_states; + int milliwatts; unsigned long cpus[]; }; @@ -79,7 +82,8 @@ struct em_data_callback { struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span); + struct em_data_callback *cb, cpumask_t *span, + bool milliwatts); void em_dev_unregister_perf_domain(struct device *dev); /** @@ -186,7 +190,8 @@ struct em_data_callback {}; static inline int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span) + struct em_data_callback *cb, cpumask_t *span, + bool milliwatts) { return -EINVAL; } -- cgit v1.2.3 From f2c90b12e700fff6a0b5a1c32f446f05f9d0890c Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 3 Nov 2020 09:05:59 +0000 Subject: PM: EM: update the comments related to power scale The Energy Model supports power values expressed in milli-Watts or in an 'abstract scale'. Update the related comments is the code to reflect that state. Reviewed-by: Quentin Perret Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 3a33c738d876..9618c0a46ef4 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -13,9 +13,8 @@ /** * em_perf_state - Performance state of a performance domain * @frequency: The frequency in KHz, for consistency with CPUFreq - * @power: The power consumed at this level, in milli-watts (by 1 CPU or - by a registered device). It can be a total power: static and - dynamic. + * @power: The power consumed at this level (by 1 CPU or by a registered + * device). It can be a total power: static and dynamic. * @cost: The cost coefficient associated with this level, used during * energy calculation. Equal to: power * max_frequency / frequency */ @@ -58,7 +57,7 @@ struct em_data_callback { /** * active_power() - Provide power at the next performance state of * a device - * @power : Active power at the performance state in mW + * @power : Active power at the performance state * (modified) * @freq : Frequency at the performance state in kHz * (modified) @@ -69,8 +68,8 @@ struct em_data_callback { * and frequency. * * In case of CPUs, the power is the one of a single CPU in the domain, - * expressed in milli-watts. It is expected to fit in the - * [0, EM_MAX_POWER] range. + * expressed in milli-Watts or an abstract scale. It is expected to + * fit in the [0, EM_MAX_POWER] range. * * Return 0 on success. */ -- cgit v1.2.3 From fc51989062138744b56e47190915ce68484e73f3 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 3 Nov 2020 16:06:25 +0100 Subject: PM: domains: Rename pm_genpd_syscore_poweroff|poweron() To better describe what the pm_genpd_syscore_poweroff|poweron() functions actually do, let's rename them to dev_pm_genpd_suspend|resume() and update the rather few callers of them accordingly (a couple of clocksource drivers). Moreover, let's take the opportunity to add some documentation of these exported functions, as that is currently missing. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1ad0ec481416..a8f93328daec 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -280,11 +280,11 @@ static inline int dev_pm_genpd_remove_notifier(struct device *dev) #endif #ifdef CONFIG_PM_GENERIC_DOMAINS_SLEEP -void pm_genpd_syscore_poweroff(struct device *dev); -void pm_genpd_syscore_poweron(struct device *dev); +void dev_pm_genpd_suspend(struct device *dev); +void dev_pm_genpd_resume(struct device *dev); #else -static inline void pm_genpd_syscore_poweroff(struct device *dev) {} -static inline void pm_genpd_syscore_poweron(struct device *dev) {} +static inline void dev_pm_genpd_suspend(struct device *dev) {} +static inline void dev_pm_genpd_resume(struct device *dev) {} #endif /* OF PM domain providers */ -- cgit v1.2.3 From 36e68442d1afd4f720704ee1ea8486331507e834 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 9 Nov 2020 17:19:31 -0800 Subject: bpf: Load and verify kernel module BTFs Add kernel module listener that will load/validate and unload module BTF. Module BTFs gets ID generated for them, which makes it possible to iterate them with existing BTF iteration API. They are given their respective module's names, which will get reported through GET_OBJ_INFO API. They are also marked as in-kernel BTFs for tooling to distinguish them from user-provided BTFs. Also, similarly to vmlinux BTF, kernel module BTFs are exposed through sysfs as /sys/kernel/btf/. This is convenient for user-space tools to inspect module BTF contents and dump their types with existing tools: [vmuser@archvm bpf]$ ls -la /sys/kernel/btf total 0 drwxr-xr-x 2 root root 0 Nov 4 19:46 . drwxr-xr-x 13 root root 0 Nov 4 19:46 .. ... -r--r--r-- 1 root root 888 Nov 4 19:46 irqbypass -r--r--r-- 1 root root 100225 Nov 4 19:46 kvm -r--r--r-- 1 root root 35401 Nov 4 19:46 kvm_intel -r--r--r-- 1 root root 120 Nov 4 19:46 pcspkr -r--r--r-- 1 root root 399 Nov 4 19:46 serio_raw -r--r--r-- 1 root root 4094095 Nov 4 19:46 vmlinux Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/bpf/20201110011932.3201430-5-andrii@kernel.org --- include/linux/bpf.h | 2 ++ include/linux/module.h | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 73d5381a5d5c..581b2a2e78eb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -36,9 +36,11 @@ struct seq_operations; struct bpf_iter_aux_info; struct bpf_local_storage; struct bpf_local_storage_map; +struct kobject; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; +extern struct kobject *btf_kobj; typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, struct bpf_iter_aux_info *aux); diff --git a/include/linux/module.h b/include/linux/module.h index a29187f7c360..20fce258ffba 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -475,6 +475,10 @@ struct module { unsigned int num_bpf_raw_events; struct bpf_raw_event_map *bpf_raw_events; #endif +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + unsigned int btf_data_size; + void *btf_data; +#endif #ifdef CONFIG_JUMP_LABEL struct jump_entry *jump_entries; unsigned int num_jump_entries; -- cgit v1.2.3 From 8a3c84b649b033024d2349f96234b26cbd6083a6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 10 Nov 2020 16:50:21 -0800 Subject: vfs: separate __sb_start_write into blocking and non-blocking helpers Break this function into two helpers so that it's obvious that the trylock versions return a value that must be checked, and the blocking versions don't require that. While we're at it, clean up the return type mismatch. Signed-off-by: Darrick J. Wong Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig --- include/linux/fs.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0bd126418bb6..305989afd49c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1581,7 +1581,8 @@ extern struct timespec64 current_time(struct inode *inode); */ void __sb_end_write(struct super_block *sb, int level); -int __sb_start_write(struct super_block *sb, int level, bool wait); +void __sb_start_write(struct super_block *sb, int level); +bool __sb_start_write_trylock(struct super_block *sb, int level); #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) @@ -1645,12 +1646,12 @@ static inline void sb_end_intwrite(struct super_block *sb) */ static inline void sb_start_write(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_WRITE, true); + __sb_start_write(sb, SB_FREEZE_WRITE); } -static inline int sb_start_write_trylock(struct super_block *sb) +static inline bool sb_start_write_trylock(struct super_block *sb) { - return __sb_start_write(sb, SB_FREEZE_WRITE, false); + return __sb_start_write_trylock(sb, SB_FREEZE_WRITE); } /** @@ -1674,7 +1675,7 @@ static inline int sb_start_write_trylock(struct super_block *sb) */ static inline void sb_start_pagefault(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true); + __sb_start_write(sb, SB_FREEZE_PAGEFAULT); } /* @@ -1692,12 +1693,12 @@ static inline void sb_start_pagefault(struct super_block *sb) */ static inline void sb_start_intwrite(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_FS, true); + __sb_start_write(sb, SB_FREEZE_FS); } -static inline int sb_start_intwrite_trylock(struct super_block *sb) +static inline bool sb_start_intwrite_trylock(struct super_block *sb) { - return __sb_start_write(sb, SB_FREEZE_FS, false); + return __sb_start_write_trylock(sb, SB_FREEZE_FS); } @@ -2756,14 +2757,14 @@ static inline void file_start_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return; - __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); + sb_start_write(file_inode(file)->i_sb); } static inline bool file_start_write_trylock(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return true; - return __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, false); + return sb_start_write_trylock(file_inode(file)->i_sb); } static inline void file_end_write(struct file *file) -- cgit v1.2.3 From 9b8523423b23ee3dfd88e32f5b7207be56a4e782 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 10 Nov 2020 16:50:21 -0800 Subject: vfs: move __sb_{start,end}_write* to fs.h Now that we've straightened out the callers, move these three functions to fs.h since they're fairly trivial. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara --- include/linux/fs.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 305989afd49c..6dabd019cab0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1580,9 +1580,24 @@ extern struct timespec64 current_time(struct inode *inode); * Snapshotting support. */ -void __sb_end_write(struct super_block *sb, int level); -void __sb_start_write(struct super_block *sb, int level); -bool __sb_start_write_trylock(struct super_block *sb, int level); +/* + * These are internal functions, please use sb_start_{write,pagefault,intwrite} + * instead. + */ +static inline void __sb_end_write(struct super_block *sb, int level) +{ + percpu_up_read(sb->s_writers.rw_sem + level-1); +} + +static inline void __sb_start_write(struct super_block *sb, int level) +{ + percpu_down_read(sb->s_writers.rw_sem + level - 1); +} + +static inline bool __sb_start_write_trylock(struct super_block *sb, int level) +{ + return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1); +} #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) -- cgit v1.2.3 From 60602cb549f1965a7edbc96026760dfb93911fab Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 28 Oct 2020 08:19:24 -0400 Subject: fgraph: Make overruns 4 bytes in graph stack structure Inspecting the data structures of the function graph tracer, I found that the overrun value is unsigned long, which is 8 bytes on a 64 bit machine, and not only that, the depth is an int (4 bytes). The overrun can be simply an unsigned int (4 bytes) and pack the ftrace_graph_ret structure better. The depth is moved up next to the func, as it is used more often with func, and improves cache locality. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 806196345c3f..8dde9c17aaa5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -864,11 +864,11 @@ struct ftrace_graph_ent { */ struct ftrace_graph_ret { unsigned long func; /* Current function */ + int depth; /* Number of functions that overran the depth limit for current task */ - unsigned long overrun; + unsigned int overrun; unsigned long long calltime; unsigned long long rettime; - int depth; } __packed; /* Type of the callback handlers for tracing function graph*/ -- cgit v1.2.3 From 7b68621f8d16689cbb4203aceaca86ffb165f1d0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 30 Oct 2020 17:21:00 -0400 Subject: ftrace: Clean up the recursion code a bit In trace_test_and_set_recursion(), current->trace_recursion is placed into a variable, and that variable should be used for the processing, as there's no reason to dereference current multiple times. On trace_clear_recursion(), current->trace_recursion is modified and there's no reason to copy it over to a variable. Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_recursion.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index 228cc56ed66e..a9f9c5714e65 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -162,7 +162,7 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip); static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip, int start, int max) { - unsigned int val = current->trace_recursion; + unsigned int val = READ_ONCE(current->trace_recursion); int bit; /* A previous recursion check was made */ @@ -176,18 +176,15 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign * a switch between contexts. Allow for a single recursion. */ bit = TRACE_TRANSITION_BIT; - if (trace_recursion_test(bit)) { + if (val & (1 << bit)) { do_ftrace_record_recursion(ip, pip); return -1; } - trace_recursion_set(bit); - barrier(); - return bit + 1; + } else { + /* Normal check passed, clear the transition to allow it again */ + val &= ~(1 << TRACE_TRANSITION_BIT); } - /* Normal check passed, clear the transition to allow it again */ - trace_recursion_clear(TRACE_TRANSITION_BIT); - val |= 1 << bit; current->trace_recursion = val; barrier(); @@ -197,17 +194,12 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign static __always_inline void trace_clear_recursion(int bit) { - unsigned int val = current->trace_recursion; - if (!bit) return; - bit--; - bit = 1 << bit; - val &= ~bit; - barrier(); - current->trace_recursion = val; + bit--; + trace_recursion_clear(bit); } /** -- cgit v1.2.3 From 4210d50f0b3e423e10a7a254b2a67f5c5318868e Mon Sep 17 00:00:00 2001 From: Isaac Hazan Date: Thu, 24 Sep 2020 11:43:58 +0300 Subject: thunderbolt: Add link_speed and link_width to XDomain Link speed and link width are needed for checking expected values in case of using a loopback service. Signed-off-by: Isaac Hazan Signed-off-by: Mika Westerberg Acked-by: Yehezkel Bernat Reviewed-by: Greg Kroah-Hartman --- include/linux/thunderbolt.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 5db2b11ab085..e441af88ed77 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -179,6 +179,8 @@ void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir); * @lock: Lock to serialize access to the following fields of this structure * @vendor_name: Name of the vendor (or %NULL if not known) * @device_name: Name of the device (or %NULL if not known) + * @link_speed: Speed of the link in Gb/s + * @link_width: Width of the link (1 or 2) * @is_unplugged: The XDomain is unplugged * @resume: The XDomain is being resumed * @needs_uuid: If the XDomain does not have @remote_uuid it will be @@ -223,6 +225,8 @@ struct tb_xdomain { struct mutex lock; const char *vendor_name; const char *device_name; + unsigned int link_speed; + unsigned int link_width; bool is_unplugged; bool resume; bool needs_uuid; -- cgit v1.2.3 From 5cc0df9ce10a860aaeac53f8df1cc8754c5c7b03 Mon Sep 17 00:00:00 2001 From: Isaac Hazan Date: Thu, 24 Sep 2020 11:44:01 +0300 Subject: thunderbolt: Add functions for enabling and disabling lane bonding on XDomain These can be used by service drivers to enable and disable lane bonding as needed. Signed-off-by: Isaac Hazan Signed-off-by: Mika Westerberg Acked-by: Yehezkel Bernat Reviewed-by: Greg Kroah-Hartman --- include/linux/thunderbolt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index e441af88ed77..0a747f92847e 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -247,6 +247,8 @@ struct tb_xdomain { u8 depth; }; +int tb_xdomain_lane_bonding_enable(struct tb_xdomain *xd); +void tb_xdomain_lane_bonding_disable(struct tb_xdomain *xd); int tb_xdomain_enable_paths(struct tb_xdomain *xd, u16 transmit_path, u16 transmit_ring, u16 receive_path, u16 receive_ring); -- cgit v1.2.3 From 407ac931aefda91ac90498c6b6e6893982173613 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 7 Oct 2020 17:53:44 +0300 Subject: thunderbolt: Create debugfs directory automatically for services This allows service drivers to use it as parent directory if they need to add their own debugfs entries. Signed-off-by: Mika Westerberg Acked-by: Yehezkel Bernat Reviewed-by: Greg Kroah-Hartman --- include/linux/thunderbolt.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 0a747f92847e..a844fd5d96ab 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -350,6 +350,9 @@ void tb_unregister_protocol_handler(struct tb_protocol_handler *handler); * @prtcvers: Protocol version from the properties directory * @prtcrevs: Protocol software revision from the properties directory * @prtcstns: Protocol settings mask from the properties directory + * @debugfs_dir: Pointer to the service debugfs directory. Always created + * when debugfs is enabled. Can be used by service drivers to + * add their own entries under the service. * * Each domain exposes set of services it supports as collection of * properties. For each service there will be one corresponding @@ -363,6 +366,7 @@ struct tb_service { u32 prtcvers; u32 prtcrevs; u32 prtcstns; + struct dentry *debugfs_dir; }; static inline struct tb_service *tb_service_get(struct tb_service *svc) -- cgit v1.2.3 From afe704a2d0618ebdb559b5ddb059f6cdbfc78783 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 19 Oct 2020 19:15:20 +0300 Subject: thunderbolt: Add support for end-to-end flow control USB4 spec defines end-to-end (E2E) flow control that can be used between hosts to prevent overflow of a RX ring. We previously had this partially implemented but that code was removed with commit 53f13319d131 ("thunderbolt: Get rid of E2E workaround") with the idea that we add it back properly if there ever is need. Now that we are going to add DMA traffic test driver (in subsequent patches) this can be useful. For this reason we modify tb_ring_alloc_rx/tx() so that they accept RING_FLAG_E2E and configure the hardware ring accordingly. The RX side also requires passing TX HopID (e2e_tx_hop) used in the credit grant packets. Signed-off-by: Mika Westerberg Acked-by: Yehezkel Bernat Reviewed-by: Greg Kroah-Hartman --- include/linux/thunderbolt.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index a844fd5d96ab..034dccf93955 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -481,6 +481,8 @@ struct tb_nhi { * @irq: MSI-X irq number if the ring uses MSI-X. %0 otherwise. * @vector: MSI-X vector number the ring uses (only set if @irq is > 0) * @flags: Ring specific flags + * @e2e_tx_hop: Transmit HopID when E2E is enabled. Only applicable to + * RX ring. For TX ring this should be set to %0. * @sof_mask: Bit mask used to detect start of frame PDF * @eof_mask: Bit mask used to detect end of frame PDF * @start_poll: Called when ring interrupt is triggered to start @@ -504,6 +506,7 @@ struct tb_ring { int irq; u8 vector; unsigned int flags; + int e2e_tx_hop; u16 sof_mask; u16 eof_mask; void (*start_poll)(void *data); @@ -514,6 +517,8 @@ struct tb_ring { #define RING_FLAG_NO_SUSPEND BIT(0) /* Configure the ring to be in frame mode */ #define RING_FLAG_FRAME BIT(1) +/* Enable end-to-end flow control */ +#define RING_FLAG_E2E BIT(2) struct ring_frame; typedef void (*ring_cb)(struct tb_ring *, struct ring_frame *, bool canceled); @@ -562,7 +567,8 @@ struct ring_frame { struct tb_ring *tb_ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, unsigned int flags); struct tb_ring *tb_ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags, u16 sof_mask, u16 eof_mask, + unsigned int flags, int e2e_tx_hop, + u16 sof_mask, u16 eof_mask, void (*start_poll)(void *), void *poll_data); void tb_ring_start(struct tb_ring *ring); void tb_ring_stop(struct tb_ring *ring); -- cgit v1.2.3 From 029b42d8519cef70c4fb5fcaccd08f1053ed2bf0 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 27 Oct 2020 10:57:23 +0100 Subject: spi: introduce SPI_MODE_X_MASK macro Provide a macro to filter all SPI_MODE_0,1,2,3 mode in one run. The latest SPI framework will parse the devicetree in following call sequence: of_register_spi_device() -> of_spi_parse_dt() So, driver do not need to pars the devicetree and will get prepared flags in the probe. On one hand it is good far most drivers. On other hand some drivers need to filter flags provide by SPI framework and apply know to work flags. This drivers may use SPI_MODE_X_MASK to filter MODE flags and set own, known flags: spi->flags &= ~SPI_MODE_X_MASK; spi->flags |= SPI_MODE_0; Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20201027095724.18654-2-o.rempel@pengutronix.de Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 99380c0825db..8097f27702f3 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -171,6 +171,7 @@ struct spi_device { #define SPI_MODE_1 (0|SPI_CPHA) #define SPI_MODE_2 (SPI_CPOL|0) #define SPI_MODE_3 (SPI_CPOL|SPI_CPHA) +#define SPI_MODE_X_MASK (SPI_CPOL|SPI_CPHA) #define SPI_CS_HIGH 0x04 /* chipselect active high? */ #define SPI_LSB_FIRST 0x08 /* per-word bits-on-wire */ #define SPI_3WIRE 0x10 /* SI/SO signals shared */ -- cgit v1.2.3 From 476b485be03c37212fcf10d85510aae5d34316ef Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Tue, 10 Nov 2020 13:41:17 -0800 Subject: dma-buf: Document that dma-buf size is fixed The fact that the size of dma-buf is invariant over the lifetime of the buffer is mentioned in the comment of 'dma_buf_ops.mmap', but is not documented at where the info is defined. Add the missing documentation. Signed-off-by: Jianxin Xiong Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1605044477-51833-7-git-send-email-jianxin.xiong@intel.com --- include/linux/dma-buf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 03875eaed51a..cf72699cb2bc 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -273,7 +273,7 @@ struct dma_buf_ops { /** * struct dma_buf - shared buffer object - * @size: size of the buffer + * @size: size of the buffer; invariant over the lifetime of the buffer. * @file: file pointer used for sharing buffers across, and for refcounting. * @attachments: list of dma_buf_attachment that denotes all devices attached, * protected by dma_resv lock. @@ -405,7 +405,7 @@ struct dma_buf_attachment { * @exp_name: name of the exporter - useful for debugging. * @owner: pointer to exporter module - used for refcounting kernel module * @ops: Attach allocator-defined dma buf ops to the new buffer - * @size: Size of the buffer + * @size: Size of the buffer - invariant over the lifetime of the buffer * @flags: mode flags for the file * @resv: reservation-object, NULL to allocate default one * @priv: Attach private data of allocator to this buffer -- cgit v1.2.3 From c0c5a60f0f1311bcf08bbe735122096d6326fb5b Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 7 Nov 2020 20:35:13 +0100 Subject: net: evaluate net.ipvX.conf.all.ignore_routes_with_linkdown Introduced in 0eeb075fad73, the "ignore_routes_with_linkdown" sysctl ignores a route whose interface is down. It is provided as a per-interface sysctl. However, while a "all" variant is exposed, it was a noop since it was never evaluated. We use the usual "or" logic for this kind of sysctls. Tested with: ip link add type veth # veth0 + veth1 ip link add type veth # veth1 + veth2 ip link set up dev veth0 ip link set up dev veth1 # link-status paired with veth0 ip link set up dev veth2 ip link set up dev veth3 # link-status paired with veth2 # First available path ip -4 addr add 203.0.113.${uts#H}/24 dev veth0 ip -6 addr add 2001:db8:1::${uts#H}/64 dev veth0 # Second available path ip -4 addr add 192.0.2.${uts#H}/24 dev veth2 ip -6 addr add 2001:db8:2::${uts#H}/64 dev veth2 # More specific route through first path ip -4 route add 198.51.100.0/25 via 203.0.113.254 # via veth0 ip -6 route add 2001:db8:3::/56 via 2001:db8:1::ff # via veth0 # Less specific route through second path ip -4 route add 198.51.100.0/24 via 192.0.2.254 # via veth2 ip -6 route add 2001:db8:3::/48 via 2001:db8:2::ff # via veth2 # H1: enable on "all" # H2: enable on "veth0" for v in ipv4 ipv6; do case $uts in H1) sysctl -qw net.${v}.conf.all.ignore_routes_with_linkdown=1 ;; H2) sysctl -qw net.${v}.conf.veth0.ignore_routes_with_linkdown=1 ;; esac done set -xe # When veth0 is up, best route is through veth0 ip -o route get 198.51.100.1 | grep -Fw veth0 ip -o route get 2001:db8:3::1 | grep -Fw veth0 # When veth0 is down, best route should be through veth2 on H1/H2, # but on veth0 on H2 ip link set down dev veth1 # down veth0 ip route show [ $uts != H3 ] || ip -o route get 198.51.100.1 | grep -Fw veth0 [ $uts != H3 ] || ip -o route get 2001:db8:3::1 | grep -Fw veth0 [ $uts = H3 ] || ip -o route get 198.51.100.1 | grep -Fw veth2 [ $uts = H3 ] || ip -o route get 2001:db8:3::1 | grep -Fw veth2 Without this patch, the two last lines would fail on H1 (the one using the "all" sysctl). With the patch, everything succeeds as expected. Also document the sysctl in `ip-sysctl.rst`. Fixes: 0eeb075fad73 ("net: ipv4 sysctl option to ignore routes when nexthop link is down") Signed-off-by: Vincent Bernat Signed-off-by: Jakub Kicinski --- include/linux/inetdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 3515ca64e638..3bbcddd22df8 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -126,7 +126,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS))) #define IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) \ - IN_DEV_CONF_GET((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) + IN_DEV_ORCONF((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) #define IN_DEV_ARPFILTER(in_dev) IN_DEV_ORCONF((in_dev), ARPFILTER) #define IN_DEV_ARP_ACCEPT(in_dev) IN_DEV_ORCONF((in_dev), ARP_ACCEPT) -- cgit v1.2.3 From 1af5318c00a8acc33a90537af49b3f23f72a2c4b Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 7 Nov 2020 20:35:14 +0100 Subject: net: evaluate net.ipv4.conf.all.proxy_arp_pvlan Introduced in 65324144b50b, the "proxy_arp_vlan" sysctl is a per-interface sysctl to tune proxy ARP support for private VLANs. While the "all" variant is exposed, it was a noop and never evaluated. We use the usual "or" logic for this kind of sysctls. Fixes: 65324144b50b ("net: RFC3069, private VLAN proxy arp support") Signed-off-by: Vincent Bernat Signed-off-by: Jakub Kicinski --- include/linux/inetdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 3bbcddd22df8..53aa0343bf69 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -105,7 +105,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_LOG_MARTIANS(in_dev) IN_DEV_ORCONF((in_dev), LOG_MARTIANS) #define IN_DEV_PROXY_ARP(in_dev) IN_DEV_ORCONF((in_dev), PROXY_ARP) -#define IN_DEV_PROXY_ARP_PVLAN(in_dev) IN_DEV_CONF_GET(in_dev, PROXY_ARP_PVLAN) +#define IN_DEV_PROXY_ARP_PVLAN(in_dev) IN_DEV_ORCONF((in_dev), PROXY_ARP_PVLAN) #define IN_DEV_SHARED_MEDIA(in_dev) IN_DEV_ORCONF((in_dev), SHARED_MEDIA) #define IN_DEV_TX_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), SEND_REDIRECTS) #define IN_DEV_SEC_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), \ -- cgit v1.2.3 From 1f78ae99790812481b7944cf40008aed5fd2ef18 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 10 Nov 2020 18:48:40 -0300 Subject: serial: imx: Remove unused platform data support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 5.10-rc1 i.MX is a devicetree-only platform and the existing platform data support in this driver was only useful for old non-devicetree platforms. Get rid of the platform data support since it is no longer used. Reviewed-by: Fugang Duan Acked-by: Uwe Kleine-König Signed-off-by: Fabio Estevam Link: https://lore.kernel.org/r/20201110214840.16768-1-festevam@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/serial-imx.h | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 include/linux/platform_data/serial-imx.h (limited to 'include/linux') diff --git a/include/linux/platform_data/serial-imx.h b/include/linux/platform_data/serial-imx.h deleted file mode 100644 index 0844b21372c7..000000000000 --- a/include/linux/platform_data/serial-imx.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2008 by Sascha Hauer - */ - -#ifndef ASMARM_ARCH_UART_H -#define ASMARM_ARCH_UART_H - -#define IMXUART_HAVE_RTSCTS (1<<0) - -struct imxuart_platform_data { - unsigned int flags; -}; - -#endif -- cgit v1.2.3 From 5e844cc37a5cbaa460e68f9a989d321d63088a89 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 11 Nov 2020 20:07:10 +0100 Subject: spi: Introduce device-managed SPI controller allocation SPI driver probing currently comprises two steps, whereas removal comprises only one step: spi_alloc_master() spi_register_controller() spi_unregister_controller() That's because spi_unregister_controller() calls device_unregister() instead of device_del(), thereby releasing the reference on the spi_controller which was obtained by spi_alloc_master(). An SPI driver's private data is contained in the same memory allocation as the spi_controller struct. Thus, once spi_unregister_controller() has been called, the private data is inaccessible. But some drivers need to access it after spi_unregister_controller() to perform further teardown steps. Introduce devm_spi_alloc_master() and devm_spi_alloc_slave(), which release a reference on the spi_controller struct only after the driver has unbound, thereby keeping the memory allocation accessible. Change spi_unregister_controller() to not release a reference if the spi_controller was allocated by one of these new devm functions. The present commit is small enough to be backportable to stable. It allows fixing drivers which use the private data in their ->remove() hook after it's been freed. It also allows fixing drivers which neglect to release a reference on the spi_controller in the probe error path. Long-term, most SPI drivers shall be moved over to the devm functions introduced herein. The few that can't shall be changed in a treewide commit to explicitly release the last reference on the controller. That commit shall amend spi_unregister_controller() to no longer release a reference, thereby completing the migration. As a result, the behaviour will be less surprising and more consistent with subsystems such as IIO, which also includes the private data in the allocation of the generic iio_dev struct, but calls device_del() in iio_device_unregister(). Signed-off-by: Lukas Wunner Link: https://lore.kernel.org/r/272bae2ef08abd21388c98e23729886663d19192.1605121038.git.lukas@wunner.de Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 99380c0825db..b390fdac1587 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -734,6 +734,25 @@ static inline struct spi_controller *spi_alloc_slave(struct device *host, return __spi_alloc_controller(host, size, true); } +struct spi_controller *__devm_spi_alloc_controller(struct device *dev, + unsigned int size, + bool slave); + +static inline struct spi_controller *devm_spi_alloc_master(struct device *dev, + unsigned int size) +{ + return __devm_spi_alloc_controller(dev, size, false); +} + +static inline struct spi_controller *devm_spi_alloc_slave(struct device *dev, + unsigned int size) +{ + if (!IS_ENABLED(CONFIG_SPI_SLAVE)) + return NULL; + + return __devm_spi_alloc_controller(dev, size, true); +} + extern int spi_register_controller(struct spi_controller *ctlr); extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); -- cgit v1.2.3 From cd2c40ff90b0e385c18f881ab5e17f7137864223 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Thu, 29 Oct 2020 15:27:36 -0700 Subject: platform/chrome: cros_ec: Import Type C host commands Import the EC_CMD_TYPEC_STATUS and EC_CMD_TYPEC_DISCOVERY Chrome OS EC host commands from the EC code base [1]. These commands can be used by the application processor to query Power Delivery (PD) discovery information concerning connected Type C peripherals. Also add the EC_FEATURE_TYPEC_CMD feature flag, which is used to determine whether these commands are supported by the EC. [1]: https://chromium.googlesource.com/chromiumos/platform/ec/+/refs/heads/master/include/ec_commands.h Signed-off-by: Prashant Malani Signed-off-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/20201029222738.482366-5-pmalani@chromium.org --- include/linux/platform_data/cros_ec_commands.h | 155 +++++++++++++++++++++++++ 1 file changed, 155 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 1fcfe9e63cb9..7f54fdcdd8cb 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1284,6 +1284,8 @@ enum ec_feature_code { EC_FEATURE_SCP = 39, /* The MCU is an Integrated Sensor Hub */ EC_FEATURE_ISH = 40, + /* New TCPMv2 TYPEC_ prefaced commands supported */ + EC_FEATURE_TYPEC_CMD = 41, }; #define EC_FEATURE_MASK_0(event_code) BIT(event_code % 32) @@ -5528,6 +5530,159 @@ struct ec_response_regulator_get_voltage { uint32_t voltage_mv; } __ec_align4; +/* + * Gather all discovery information for the given port and partner type. + * + * Note that if discovery has not yet completed, only the currently completed + * responses will be filled in. If the discovery data structures are changed + * in the process of the command running, BUSY will be returned. + * + * VDO field sizes are set to the maximum possible number of VDOs a VDM may + * contain, while the number of SVIDs here is selected to fit within the PROTO2 + * maximum parameter size. + */ +#define EC_CMD_TYPEC_DISCOVERY 0x0131 + +enum typec_partner_type { + TYPEC_PARTNER_SOP = 0, + TYPEC_PARTNER_SOP_PRIME = 1, +}; + +struct ec_params_typec_discovery { + uint8_t port; + uint8_t partner_type; /* enum typec_partner_type */ +} __ec_align1; + +struct svid_mode_info { + uint16_t svid; + uint16_t mode_count; /* Number of modes partner sent */ + uint32_t mode_vdo[6]; /* Max VDOs allowed after VDM header is 6 */ +}; + +struct ec_response_typec_discovery { + uint8_t identity_count; /* Number of identity VDOs partner sent */ + uint8_t svid_count; /* Number of SVIDs partner sent */ + uint16_t reserved; + uint32_t discovery_vdo[6]; /* Max VDOs allowed after VDM header is 6 */ + struct svid_mode_info svids[0]; +} __ec_align1; + +/* + * Gather all status information for a port. + * + * Note: this covers many of the return fields from the deprecated + * EC_CMD_USB_PD_CONTROL command, except those that are redundant with the + * discovery data. The "enum pd_cc_states" is defined with the deprecated + * EC_CMD_USB_PD_CONTROL command. + * + * This also combines in the EC_CMD_USB_PD_MUX_INFO flags. + */ +#define EC_CMD_TYPEC_STATUS 0x0133 + +/* + * Power role. + * + * Note this is also used for PD header creation, and values align to those in + * the Power Delivery Specification Revision 3.0 (See + * 6.2.1.1.4 Port Power Role). + */ +enum pd_power_role { + PD_ROLE_SINK = 0, + PD_ROLE_SOURCE = 1 +}; + +/* + * Data role. + * + * Note this is also used for PD header creation, and the first two values + * align to those in the Power Delivery Specification Revision 3.0 (See + * 6.2.1.1.6 Port Data Role). + */ +enum pd_data_role { + PD_ROLE_UFP = 0, + PD_ROLE_DFP = 1, + PD_ROLE_DISCONNECTED = 2, +}; + +enum pd_vconn_role { + PD_ROLE_VCONN_OFF = 0, + PD_ROLE_VCONN_SRC = 1, +}; + +/* + * Note: BIT(0) may be used to determine whether the polarity is CC1 or CC2, + * regardless of whether a debug accessory is connected. + */ +enum tcpc_cc_polarity { + /* + * _CCx: is used to indicate the polarity while not connected to + * a Debug Accessory. Only one CC line will assert a resistor and + * the other will be open. + */ + POLARITY_CC1 = 0, + POLARITY_CC2 = 1, + + /* + * _CCx_DTS is used to indicate the polarity while connected to a + * SRC Debug Accessory. Assert resistors on both lines. + */ + POLARITY_CC1_DTS = 2, + POLARITY_CC2_DTS = 3, + + /* + * The current TCPC code relies on these specific POLARITY values. + * Adding in a check to verify if the list grows for any reason + * that this will give a hint that other places need to be + * adjusted. + */ + POLARITY_COUNT +}; + +#define PD_STATUS_EVENT_SOP_DISC_DONE BIT(0) +#define PD_STATUS_EVENT_SOP_PRIME_DISC_DONE BIT(1) + +struct ec_params_typec_status { + uint8_t port; +} __ec_align1; + +struct ec_response_typec_status { + uint8_t pd_enabled; /* PD communication enabled - bool */ + uint8_t dev_connected; /* Device connected - bool */ + uint8_t sop_connected; /* Device is SOP PD capable - bool */ + uint8_t source_cap_count; /* Number of Source Cap PDOs */ + + uint8_t power_role; /* enum pd_power_role */ + uint8_t data_role; /* enum pd_data_role */ + uint8_t vconn_role; /* enum pd_vconn_role */ + uint8_t sink_cap_count; /* Number of Sink Cap PDOs */ + + uint8_t polarity; /* enum tcpc_cc_polarity */ + uint8_t cc_state; /* enum pd_cc_states */ + uint8_t dp_pin; /* DP pin mode (MODE_DP_IN_[A-E]) */ + uint8_t mux_state; /* USB_PD_MUX* - encoded mux state */ + + char tc_state[32]; /* TC state name */ + + uint32_t events; /* PD_STATUS_EVENT bitmask */ + + /* + * BCD PD revisions for partners + * + * The format has the PD major reversion in the upper nibble, and PD + * minor version in the next nibble. Following two nibbles are + * currently 0. + * ex. PD 3.2 would map to 0x3200 + * + * PD major/minor will be 0 if no PD device is connected. + */ + uint16_t sop_revision; + uint16_t sop_prime_revision; + + uint32_t source_cap_pdos[7]; /* Max 7 PDOs can be present */ + + uint32_t sink_cap_pdos[7]; /* Max 7 PDOs can be present */ +} __ec_align1; + /*****************************************************************************/ /* The command range 0x200-0x2FF is reserved for Rotor. */ -- cgit v1.2.3 From 7e890c37c25c7cbca37ff0ab292873d8146e713b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 12 Nov 2020 17:50:04 +0100 Subject: block: add a return value to set_capacity_revalidate_and_notify Return if the function ended up sending an uevent or not. Cc: stable@vger.kernel.org # v5.9 Signed-off-by: Christoph Hellwig Reviewed-by: Petr Vorel Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 38f23d757013..03da3f603d30 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -315,7 +315,7 @@ static inline int get_disk_ro(struct gendisk *disk) extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); -void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, +bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, bool update_bdev); /* drivers/char/random.c */ -- cgit v1.2.3 From af0c351cc34857ad7b254850b9392d99da46be9e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 10 Nov 2020 20:50:02 +0100 Subject: usbnet: switch to core handling of rx/tx byte/packet counters Use netdev->tstats instead of a member of usbnet for storing a pointer to the per-cpu counters. This allows us to use core functionality for statistics handling. Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- include/linux/usb/usbnet.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 2e4f7721fc4e..1f6dfa977e7f 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -65,8 +65,6 @@ struct usbnet { struct usb_anchor deferred; struct tasklet_struct bh; - struct pcpu_sw_netstats __percpu *stats64; - struct work_struct kevent; unsigned long flags; # define EVENT_TX_HALT 0 @@ -285,7 +283,7 @@ extern int usbnet_status_start(struct usbnet *dev, gfp_t mem_flags); extern void usbnet_status_stop(struct usbnet *dev); extern void usbnet_update_max_qlen(struct usbnet *dev); -extern void usbnet_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats); + +#define usbnet_get_stats64 dev_get_tstats64 #endif /* __LINUX_USB_USBNET_H */ -- cgit v1.2.3 From 323955a0498ccaa1263e369b91efd8f4310768b6 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 10 Nov 2020 20:51:03 +0100 Subject: net: usb: switch to dev_get_tstats64 and remove usbnet_get_stats64 alias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace usbnet_get_stats64() with new identical core function dev_get_tstats64() in all users and remove usbnet_get_stats64(). Signed-off-by: Heiner Kallweit Acked-by: Bjørn Mork Signed-off-by: Jakub Kicinski --- include/linux/usb/usbnet.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 1f6dfa977e7f..88a7673894d5 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -284,6 +284,4 @@ extern void usbnet_status_stop(struct usbnet *dev); extern void usbnet_update_max_qlen(struct usbnet *dev); -#define usbnet_get_stats64 dev_get_tstats64 - #endif /* __LINUX_USB_USBNET_H */ -- cgit v1.2.3 From 6d94e741a8ff818e5518da8257f5ca0aaed1f269 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 10 Nov 2020 19:12:11 -0800 Subject: bpf: Support for pointers beyond pkt_end. This patch adds the verifier support to recognize inlined branch conditions. The LLVM knows that the branch evaluates to the same value, but the verifier couldn't track it. Hence causing valid programs to be rejected. The potential LLVM workaround: https://reviews.llvm.org/D87428 can have undesired side effects, since LLVM doesn't know that skb->data/data_end are being compared. LLVM has to introduce extra boolean variable and use inline_asm trick to force easier for the verifier assembly. Instead teach the verifier to recognize that r1 = skb->data; r1 += 10; r2 = skb->data_end; if (r1 > r2) { here r1 points beyond packet_end and subsequent if (r1 > r2) // always evaluates to "true". } Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Tested-by: Jiri Olsa Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20201111031213.25109-2-alexei.starovoitov@gmail.com --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e83ef6f6bf43..306869d4743b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -45,7 +45,7 @@ struct bpf_reg_state { enum bpf_reg_type type; union { /* valid when type == PTR_TO_PACKET */ - u16 range; + int range; /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | * PTR_TO_MAP_VALUE_OR_NULL -- cgit v1.2.3 From 8d0dd23c6c78d140ed2132f523592ddb4cea839f Mon Sep 17 00:00:00 2001 From: Tal Zussman Date: Thu, 12 Nov 2020 16:56:57 -0500 Subject: syscalls: Fix file comments for syscalls implemented in kernel/sys.c The relevant syscalls were previously moved from kernel/timer.c to kernel/sys.c, but the comments weren't updated to reflect this change. Fixing these comments messes up the alphabetical ordering of syscalls by filename. This could be fixed by merging the two groups of kernel/sys.c syscalls, but that would require reordering the syscalls and renumbering them to maintain the numerical order in unistd.h. Signed-off-by: Tal Zussman Link: https://lore.kernel.org/r/20201112215657.GA4539@charmander' Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 37bea07c12f2..629870fbb2c9 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -744,7 +744,7 @@ asmlinkage long sys_settimeofday(struct __kernel_old_timeval __user *tv, asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p); asmlinkage long sys_adjtimex_time32(struct old_timex32 __user *txc_p); -/* kernel/timer.c */ +/* kernel/sys.c */ asmlinkage long sys_getpid(void); asmlinkage long sys_getppid(void); asmlinkage long sys_getuid(void); -- cgit v1.2.3 From a609c58086e381c13bdad1ba97e6510a13d465e7 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 12 Nov 2020 10:58:55 +0000 Subject: tty: serial: 8250: 8250_port: Move prototypes to shared location MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/tty/serial/8250/8250_port.c:349:14: warning: no previous prototype for ‘au_serial_in’ [-Wmissing-prototypes] drivers/tty/serial/8250/8250_port.c:359:6: warning: no previous prototype for ‘au_serial_out’ [-Wmissing-prototypes] Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Mike Hudson Cc: linux-serial@vger.kernel.org Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20201112105857.2078977-3-lee.jones@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 2b70f736b091..9e655055112d 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -187,4 +187,9 @@ extern void serial8250_set_isa_configurator(void (*v) (int port, struct uart_port *up, u32 *capabilities)); +#ifdef CONFIG_SERIAL_8250_RT288X +unsigned int au_serial_in(struct uart_port *p, int offset); +void au_serial_out(struct uart_port *p, int offset, int value); +#endif + #endif -- cgit v1.2.3 From 423f16108c9d832bd96059d5c882c8ef6d76eb96 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 13 Nov 2020 00:59:29 +0000 Subject: bpf: Augment the set of sleepable LSM hooks Update the set of sleepable hooks with the ones that do not trigger a warning with might_fault() when exercised with the correct kernel config options enabled, i.e. DEBUG_ATOMIC_SLEEP=y LOCKDEP=y PROVE_LOCKING=y This means that a sleepable LSM eBPF program can be attached to these LSM hooks. A new helper method bpf_lsm_is_sleepable_hook is added and the set is maintained locally in bpf_lsm.c Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201113005930.541956-2-kpsingh@chromium.org --- include/linux/bpf_lsm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 73226181b744..0d1c33ace398 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -27,6 +27,8 @@ extern struct lsm_blob_sizes bpf_lsm_blob_sizes; int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog); +bool bpf_lsm_is_sleepable_hook(u32 btf_id); + static inline struct bpf_storage_blob *bpf_inode( const struct inode *inode) { @@ -54,6 +56,11 @@ void bpf_task_storage_free(struct task_struct *task); #else /* !CONFIG_BPF_LSM */ +static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) +{ + return false; +} + static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog) { -- cgit v1.2.3 From d19ad0775dcd64b49eecf4fa79c17959ebfbd26b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 28 Oct 2020 17:42:17 -0400 Subject: ftrace: Have the callbacks receive a struct ftrace_regs instead of pt_regs In preparation to have arguments of a function passed to callbacks attached to functions as default, change the default callback prototype to receive a struct ftrace_regs as the forth parameter instead of a pt_regs. For callbacks that set the FL_SAVE_REGS flag in their ftrace_ops flags, they will now need to get the pt_regs via a ftrace_get_regs() helper call. If this is called by a callback that their ftrace_ops did not have a FL_SAVE_REGS flag set, it that helper function will return NULL. This will allow the ftrace_regs to hold enough just to get the parameters and stack pointer, but without the worry that callbacks may have a pt_regs that is not completely filled. Acked-by: Peter Zijlstra (Intel) Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 16 ++++++++++++++-- include/linux/kprobes.h | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8dde9c17aaa5..24e1fa52337d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -90,8 +90,20 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, struct ftrace_ops; +struct ftrace_regs { + struct pt_regs regs; +}; + +static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) +{ + if (!fregs) + return NULL; + + return &fregs->regs; +} + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs); + struct ftrace_ops *op, struct ftrace_regs *fregs); ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); @@ -259,7 +271,7 @@ int register_ftrace_function(struct ftrace_ops *ops); int unregister_ftrace_function(struct ftrace_ops *ops); extern void ftrace_stub(unsigned long a0, unsigned long a1, - struct ftrace_ops *op, struct pt_regs *regs); + struct ftrace_ops *op, struct ftrace_regs *fregs); #else /* !CONFIG_FUNCTION_TRACER */ /* diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 629abaf25681..be73350955e4 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -345,7 +345,7 @@ static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ #ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs); + struct ftrace_ops *ops, struct ftrace_regs *fregs); extern int arch_prepare_kprobe_ftrace(struct kprobe *p); #endif -- cgit v1.2.3 From 02a474ca266a47ea8f4d5a11f4ffa120f83730ad Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 27 Oct 2020 10:55:55 -0400 Subject: ftrace/x86: Allow for arguments to be passed in to ftrace_regs by default Currently, the only way to get access to the registers of a function via a ftrace callback is to set the "FL_SAVE_REGS" bit in the ftrace_ops. But as this saves all regs as if a breakpoint were to trigger (for use with kprobes), it is expensive. The regs are already saved on the stack for the default ftrace callbacks, as that is required otherwise a function being traced will get the wrong arguments and possibly crash. And on x86, the arguments are already stored where they would be on a pt_regs structure to use that code for both the regs version of a callback, it makes sense to pass that information always to all functions. If an architecture does this (as x86_64 now does), it is to set HAVE_DYNAMIC_FTRACE_WITH_ARGS, and this will let the generic code that it could have access to arguments without having to set the flags. This also includes having the stack pointer being saved, which could be used for accessing arguments on the stack, as well as having the function graph tracer not require its own trampoline! Acked-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 24e1fa52337d..588ea7023a7a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -90,16 +90,21 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, struct ftrace_ops; +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS + struct ftrace_regs { struct pt_regs regs; }; +#define arch_ftrace_get_regs(fregs) (&(fregs)->regs) + +#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) { if (!fregs) return NULL; - return &fregs->regs; + return arch_ftrace_get_regs(fregs); } typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, -- cgit v1.2.3 From 2860cd8a235375df3c8ec8039d9fe5eb2f658b86 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 28 Oct 2020 17:15:27 -0400 Subject: livepatch: Use the default ftrace_ops instead of REGS when ARGS is available When CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS is available, the ftrace call will be able to set the ip of the calling function. This will improve the performance of live kernel patching where it does not need all the regs to be stored just to change the instruction pointer. If all archs that support live kernel patching also support HAVE_DYNAMIC_FTRACE_WITH_ARGS, then the architecture specific function klp_arch_set_pc() could be made generic. It is possible that an arch can support HAVE_DYNAMIC_FTRACE_WITH_ARGS but not HAVE_DYNAMIC_FTRACE_WITH_REGS and then have access to live patching. Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: live-patching@vger.kernel.org Acked-by: Peter Zijlstra (Intel) Acked-by: Miroslav Benes Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 588ea7023a7a..9a8ce28e4485 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -97,6 +97,13 @@ struct ftrace_regs { }; #define arch_ftrace_get_regs(fregs) (&(fregs)->regs) +/* + * ftrace_instruction_pointer_set() is to be defined by the architecture + * if to allow setting of the instruction pointer from the ftrace_regs + * when HAVE_DYNAMIC_FTRACE_WITH_ARGS is set and it supports + * live kernel patching. + */ +#define ftrace_instruction_pointer_set(fregs, ip) do { } while (0) #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) -- cgit v1.2.3 From e7018751d2e603381ae6028ba4dd21ec45ce38bb Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 13 Nov 2020 14:12:31 -0300 Subject: usb: host: ehci-mxc: Remove the driver The ehci-mxc driver was only used by i.MX non-DT platforms. Since 5.10-rc1, i.MX has been converted to a DT-only platform and all board files are gone. Remove the ehci-mxc driver as there are no more users at all. Acked-by: Alan Stern Signed-off-by: Fabio Estevam Link: https://lore.kernel.org/r/20201113171231.2205-1-festevam@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/usb-ehci-mxc.h | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 include/linux/platform_data/usb-ehci-mxc.h (limited to 'include/linux') diff --git a/include/linux/platform_data/usb-ehci-mxc.h b/include/linux/platform_data/usb-ehci-mxc.h deleted file mode 100644 index ad9794d09bc8..000000000000 --- a/include/linux/platform_data/usb-ehci-mxc.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __INCLUDE_ASM_ARCH_MXC_EHCI_H -#define __INCLUDE_ASM_ARCH_MXC_EHCI_H - -struct mxc_usbh_platform_data { - int (*init)(struct platform_device *pdev); - int (*exit)(struct platform_device *pdev); - - unsigned int portsc; - struct usb_phy *otg; -}; - -#endif /* __INCLUDE_ASM_ARCH_MXC_EHCI_H */ - -- cgit v1.2.3 From 56c62080d5b57dac2c2cdd4a83571450e38ca763 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 13 Nov 2020 22:27:04 +0100 Subject: usb: hcd.h: Remove RUN_CONTEXT The last user of RUN_CONTEXT was removed in commit 97c17beb3b668 ("[PATCH] ehci-hcd (1/2): portability (2.4), tasklet,") in the history.git repo. There are no users of RUN_CONTEXT, remove it. Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20201113212704.2243807-1-bigeasy@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 3dbb42c637c1..96281cd50ff6 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -734,10 +734,6 @@ static inline void usbmon_urb_complete(struct usb_bus *bus, struct urb *urb, /* random stuff */ -#define RUN_CONTEXT (in_irq() ? "in_irq" \ - : (in_interrupt() ? "in_interrupt" : "can sleep")) - - /* This rwsem is for use only by the hub driver and ehci-hcd. * Nobody else should touch it. */ -- cgit v1.2.3 From 8dedcc3eee3aceb37832176f0a1b03d5687acda3 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 24 Sep 2020 11:41:55 +0300 Subject: iio: core: centralize ioctl() calls to the main chardev The aim of this is to improve a bit the organization of ioctl() calls in IIO core. Currently the chardev is split across IIO core sub-modules/files. The main chardev has to be able to handle ioctl() calls, and if we need to add buffer ioctl() calls, this would complicate things. The 'industrialio-core.c' file will provide a 'iio_device_ioctl()' which will iterate over a list of ioctls registered with the IIO device. These can be event ioctl() or buffer ioctl() calls, or something else. Each ioctl() handler will have to return a IIO_IOCTL_UNHANDLED code (which is positive 1), if the ioctl() did not handle the call in any. This eliminates any potential ambiguities about negative error codes, which should fail the call altogether. If any ioctl() returns 0, it was considered that it was serviced successfully and the loop will exit. This change also moves the handling of the IIO_GET_EVENT_FD_IOCTL command inside 'industrialio-event.c', where this is better suited. This patch is a combination of 2 other patches from an older series: Patch 1: iio: core: add simple centralized mechanism for ioctl() handlers Link: https://lore.kernel.org/linux-iio/20200427131100.50845-6-alexandru.ardelean@analog.com/ Patch 2: iio: core: use new common ioctl() mechanism Link: https://lore.kernel.org/linux-iio/20200427131100.50845-7-alexandru.ardelean@analog.com/ Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20200924084155.99406-1-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio-opaque.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index f2e94196d31f..07c5a8e52ca8 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -11,6 +11,7 @@ * @channel_attr_list: keep track of automatically created channel * attributes * @chan_attr_group: group for all attrs in base directory + * @ioctl_handlers: ioctl handlers registered with the core handler * @debugfs_dentry: device specific debugfs dentry * @cached_reg_addr: cached register address for debugfs reads * @read_buf: read buffer to be used for the initial reg read @@ -22,6 +23,7 @@ struct iio_dev_opaque { struct list_head buffer_list; struct list_head channel_attr_list; struct attribute_group chan_attr_group; + struct list_head ioctl_handlers; #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_dentry; unsigned cached_reg_addr; -- cgit v1.2.3 From 3347acc6fcd4ee71ad18a9ff9d9dac176b517329 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Fri, 13 Nov 2020 22:51:59 -0800 Subject: compiler.h: fix barrier_data() on clang Commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") neglected to copy barrier_data() from compiler-gcc.h into compiler-clang.h. The definition in compiler-gcc.h was really to work around clang's more aggressive optimization, so this broke barrier_data() on clang, and consequently memzero_explicit() as well. For example, this results in at least the memzero_explicit() call in lib/crypto/sha256.c:sha256_transform() being optimized away by clang. Fix this by moving the definition of barrier_data() into compiler.h. Also move the gcc/clang definition of barrier() into compiler.h, __memory_barrier() is icc-specific (and barrier() is already defined using it in compiler-intel.h) and doesn't belong in compiler.h. [rdunlap@infradead.org: fix ALPHA builds when SMP is not enabled] Link: https://lkml.kernel.org/r/20201101231835.4589-1-rdunlap@infradead.org Fixes: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") Signed-off-by: Arvind Sankar Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Tested-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Cc: Link: https://lkml.kernel.org/r/20201014212631.207844-1-nivedita@alum.mit.edu Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 6 ------ include/linux/compiler-gcc.h | 19 ------------------- include/linux/compiler.h | 18 ++++++++++++++++-- 3 files changed, 16 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 230604e7f057..dd7233c48bf3 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -60,12 +60,6 @@ #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif -/* The following are for compatibility with GCC, from compiler-gcc.h, - * and may be redefined here because they should not be shared with other - * compilers, like ICC. - */ -#define barrier() __asm__ __volatile__("" : : : "memory") - #if __has_feature(shadow_call_stack) # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 5deb37024574..74c6c0486eed 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -15,25 +15,6 @@ # error Sorry, your version of GCC is too old - please use 4.9 or newer. #endif -/* Optimization barrier */ - -/* The "volatile" is due to gcc bugs */ -#define barrier() __asm__ __volatile__("": : :"memory") -/* - * This version is i.e. to prevent dead stores elimination on @ptr - * where gcc and llvm may behave differently when otherwise using - * normal barrier(): while gcc behavior gets along with a normal - * barrier(), llvm needs an explicit input variable to be assumed - * clobbered. The issue is as follows: while the inline asm might - * access any memory it wants, the compiler could have fit all of - * @ptr into memory registers instead, and since @ptr never escaped - * from that, it proved that the inline asm wasn't touching any of - * it. This version works well with both compilers, i.e. we're telling - * the compiler that the inline asm absolutely may see the contents - * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 - */ -#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") - /* * This macro obfuscates arithmetic on a variable address so that gcc * shouldn't recognize the original var, and make assumptions about it. diff --git a/include/linux/compiler.h b/include/linux/compiler.h index e512f5505dad..b8fe0c23cfff 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -80,11 +80,25 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Optimization barrier */ #ifndef barrier -# define barrier() __memory_barrier() +/* The "volatile" is due to gcc bugs */ +# define barrier() __asm__ __volatile__("": : :"memory") #endif #ifndef barrier_data -# define barrier_data(ptr) barrier() +/* + * This version is i.e. to prevent dead stores elimination on @ptr + * where gcc and llvm may behave differently when otherwise using + * normal barrier(): while gcc behavior gets along with a normal + * barrier(), llvm needs an explicit input variable to be assumed + * clobbered. The issue is as follows: while the inline asm might + * access any memory it wants, the compiler could have fit all of + * @ptr into memory registers instead, and since @ptr never escaped + * from that, it proved that the inline asm wasn't touching any of + * it. This version works well with both compilers, i.e. we're telling + * the compiler that the inline asm absolutely may see the contents + * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 + */ +# define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") #endif /* workaround for GCC PR82365 if needed */ -- cgit v1.2.3 From 8b21ca0218d29cc6bb7028125c7e5a10dfb4730c Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 13 Nov 2020 22:52:13 -0800 Subject: mm: memcontrol: fix missing wakeup polling thread When we poll the swap.events, we can miss being woken up when the swap event occurs. Because we didn't notify. Fixes: f3a53a3a1e5b ("mm, memcontrol: implement memory.swap.events") Signed-off-by: Muchun Song Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Roman Gushchin Cc: Michal Hocko Cc: Yafang Shao Cc: Chris Down Cc: Tejun Heo Link: https://lkml.kernel.org/r/20201105161936.98312-1-songmuchun@bytedance.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e391e3c56de5..a80c59af2c60 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -900,12 +900,19 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, static inline void memcg_memory_event(struct mem_cgroup *memcg, enum memcg_memory_event event) { + bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX || + event == MEMCG_SWAP_FAIL; + atomic_long_inc(&memcg->memory_events_local[event]); - cgroup_file_notify(&memcg->events_local_file); + if (!swap_event) + cgroup_file_notify(&memcg->events_local_file); do { atomic_long_inc(&memcg->memory_events[event]); - cgroup_file_notify(&memcg->events_file); + if (swap_event) + cgroup_file_notify(&memcg->swap_events_file); + else + cgroup_file_notify(&memcg->events_file); if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) break; -- cgit v1.2.3 From 30d6f8c15d2cd877c1f3d47d8a1064649ebe58e2 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 21 Oct 2020 18:21:46 +0200 Subject: clk: add api to get clk consumer from clk_hw clk_register() is deprecated. Using 'clk' member of struct clk_hw is discouraged. With this constraint, it is difficult for driver to register clocks using the clk_hw API and then use the clock with the consumer API This adds a simple helper, clk_hw_get_clk(), to get a struct clk from a struct clk_hw. Like other clk_get() variant, each call to this helper must be balanced with a call to clk_put(). To make life easier on the consumers, a memory managed version is provided as well. Cc: Martin Blumenstingl Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201021162147.563655-3-jbrunet@baylibre.com Tested-by: Kevin Hilman [sboyd@kernel.org: Fix kernel-doc] Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 03a5de5f99f4..86b707520ec0 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1088,6 +1088,11 @@ static inline struct clk_hw *__clk_get_hw(struct clk *clk) return (struct clk_hw *)clk; } #endif + +struct clk *clk_hw_get_clk(struct clk_hw *hw, const char *con_id); +struct clk *devm_clk_hw_get_clk(struct device *dev, struct clk_hw *hw, + const char *con_id); + unsigned int clk_hw_get_num_parents(const struct clk_hw *hw); struct clk_hw *clk_hw_get_parent(const struct clk_hw *hw); struct clk_hw *clk_hw_get_parent_by_index(const struct clk_hw *hw, -- cgit v1.2.3 From 6d30d50d037dfa092f9d5d1fffa348ab4abb7163 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 21 Oct 2020 18:38:46 +0200 Subject: clk: add devm variant of clk_notifier_register Add a memory managed variant of clk_notifier_register() to make life easier on clock consumers using notifiers Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201021163847.595189-2-jbrunet@baylibre.com Signed-off-by: Stephen Boyd --- include/linux/clk.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index 7fd6a1febcf4..f53afdf8198b 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -109,6 +109,16 @@ int clk_notifier_register(struct clk *clk, struct notifier_block *nb); */ int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb); +/** + * devm_clk_notifier_register - register a managed rate-change notifier callback + * @dev: device for clock "consumer" + * @clk: clock whose rate we are interested in + * @nb: notifier block with callback function pointer + * + * Returns 0 on success, -EERROR otherwise + */ +int devm_clk_notifier_register(struct device *dev, struct clk *clk, struct notifier_block *nb); + /** * clk_get_accuracy - obtain the clock accuracy in ppb (parts per billion) * for a clock source. -- cgit v1.2.3 From e6fb7aee486c7fbd4d94f4894feaa6f0424c1740 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 21 Oct 2020 18:38:47 +0200 Subject: clk: meson: g12: use devm variant to register notifiers Until now, nothing was done to unregister the dvfs clock notifiers of the Amlogic g12 SoC family. This is not great but this driver was not really expected to be unloaded. With the ongoing effort to build everything as module for this platform, this needs to be cleanly handled. Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201021163847.595189-3-jbrunet@baylibre.com Signed-off-by: Stephen Boyd --- include/linux/clk.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index f53afdf8198b..4ac766dc3daf 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -117,7 +117,8 @@ int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb); * * Returns 0 on success, -EERROR otherwise */ -int devm_clk_notifier_register(struct device *dev, struct clk *clk, struct notifier_block *nb); +int devm_clk_notifier_register(struct device *dev, struct clk *clk, + struct notifier_block *nb); /** * clk_get_accuracy - obtain the clock accuracy in ppb (parts per billion) @@ -196,6 +197,13 @@ static inline int clk_notifier_unregister(struct clk *clk, return -ENOTSUPP; } +static inline int devm_clk_notifier_register(struct device *dev, + struct clk *clk, + struct notifier_block *nb) +{ + return -ENOTSUPP; +} + static inline long clk_get_accuracy(struct clk *clk) { return -ENOTSUPP; -- cgit v1.2.3 From f296dcd629aa412a80a53215e46087f53af87f08 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Nov 2020 22:01:45 +0100 Subject: genirq: Remove GENERIC_IRQ_LEGACY_ALLOC_HWIRQ Commit bb9d812643d8 ("arch: remove tile port") removed the last user of this cruft two years ago... Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87eekvac06.fsf@nanos.tec.linutronix.de --- include/linux/irq.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index c54365309e97..79ce314a603b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -954,21 +954,6 @@ static inline void irq_free_desc(unsigned int irq) irq_free_descs(irq, 1); } -#ifdef CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ -unsigned int irq_alloc_hwirqs(int cnt, int node); -static inline unsigned int irq_alloc_hwirq(int node) -{ - return irq_alloc_hwirqs(1, node); -} -void irq_free_hwirqs(unsigned int from, int cnt); -static inline void irq_free_hwirq(unsigned int irq) -{ - return irq_free_hwirqs(irq, 1); -} -int arch_setup_hwirq(unsigned int irq, int node); -void arch_teardown_hwirq(unsigned int irq); -#endif - #ifdef CONFIG_GENERIC_IRQ_LEGACY void irq_init_desc(unsigned int irq); #endif -- cgit v1.2.3 From e906a546bd8653ed2e7a14cb300fd17952d7f862 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Nov 2020 23:36:28 +0100 Subject: genirq/irqdomain: Make irq_domain_disassociate() static No users outside of the core code. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87a6vja7mb.fsf@nanos.tec.linutronix.de --- include/linux/irqdomain.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 77bf7d84c673..5701a8b01726 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -387,8 +387,6 @@ extern int irq_domain_associate(struct irq_domain *domain, unsigned int irq, extern void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, irq_hw_number_t hwirq_base, int count); -extern void irq_domain_disassociate(struct irq_domain *domain, - unsigned int irq); extern unsigned int irq_create_mapping(struct irq_domain *host, irq_hw_number_t hwirq); -- cgit v1.2.3 From c4d51a52c67a1e3a0fa3006e5ec21cdc07649cd6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 27 Oct 2020 14:39:43 +0000 Subject: sched/wait: Add add_wait_queue_priority() This allows an exclusive wait_queue_entry to be added at the head of the queue, instead of the tail as normal. Thus, it gets to consume events first without allowing non-exclusive waiters to be woken at all. The (first) intended use is for KVM IRQFD, which currently has inconsistent behaviour depending on whether posted interrupts are available or not. If they are, KVM will bypass the eventfd completely and deliver interrupts directly to the appropriate vCPU. If not, events are delivered through the eventfd and userspace will receive them when polling on the eventfd. By using add_wait_queue_priority(), KVM will be able to consistently consume events within the kernel without accidentally exposing them to userspace when they're supposed to be bypassed. This, in turn, means that userspace doesn't have to jump through hoops to avoid listening on the erroneously noisy eventfd and injecting duplicate interrupts. Signed-off-by: David Woodhouse Message-Id: <20201027143944.648769-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/linux/wait.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 27fb99cfeb02..fe10e8570a52 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -22,6 +22,7 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int #define WQ_FLAG_BOOKMARK 0x04 #define WQ_FLAG_CUSTOM 0x08 #define WQ_FLAG_DONE 0x10 +#define WQ_FLAG_PRIORITY 0x20 /* * A single wait-queue entry structure: @@ -164,11 +165,20 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); +extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { - list_add(&wq_entry->entry, &wq_head->head); + struct list_head *head = &wq_head->head; + struct wait_queue_entry *wq; + + list_for_each_entry(wq, &wq_head->head, entry) { + if (!(wq->flags & WQ_FLAG_PRIORITY)) + break; + head = &wq->entry; + } + list_add(&wq_entry->entry, head); } /* -- cgit v1.2.3 From 28f1326710555bbe666f64452d08f2d7dd657cae Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 27 Oct 2020 13:55:21 +0000 Subject: eventfd: Export eventfd_ctx_do_read() Where events are consumed in the kernel, for example by KVM's irqfd_wakeup() and VFIO's virqfd_wakeup(), they currently lack a mechanism to drain the eventfd's counter. Since the wait queue is already locked while the wakeup functions are invoked, all they really need to do is call eventfd_ctx_do_read(). Add a check for the lock, and export it for them. Signed-off-by: David Woodhouse Message-Id: <20201027135523.646811-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/linux/eventfd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index dc4fd8a6644d..fa0a524baed0 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -41,6 +41,7 @@ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); +void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt); DECLARE_PER_CPU(int, eventfd_wake_count); @@ -82,6 +83,11 @@ static inline bool eventfd_signal_count(void) return false; } +static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) +{ + +} + #endif #endif /* _LINUX_EVENTFD_H */ -- cgit v1.2.3 From 2f5414423ef577e9e8bdb227f32d0abdd34e4274 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 6 Nov 2020 05:25:09 -0500 Subject: KVM: remove kvm_clear_guest_page kvm_clear_guest_page is not used anymore after "KVM: X86: Don't track dirty for KVM_SET_[TSS_ADDR|IDENTITY_MAP_ADDR]", except from kvm_clear_guest. We can just inline it in its sole user. Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7f2e2a09ebbd..66a4324f329d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -792,7 +792,6 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, offset_in_page(__gpa), v); \ }) -int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); -- cgit v1.2.3 From 28bd726aa404c0da8fd6852fe69bb4538a103b71 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:20:34 -0400 Subject: KVM: Pass in kvm pointer into mark_page_dirty_in_slot() The context will be needed to implement the kvm dirty ring. Signed-off-by: Peter Xu Message-Id: <20201001012044.5151-5-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 66a4324f329d..ca7c1459a8e3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -797,7 +797,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); -void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn); +void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From fb04a1eddb1a65b6588a021bdc132270d5ae48bb Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:22 -0400 Subject: KVM: X86: Implement ring-based dirty memory tracking This patch is heavily based on previous work from Lei Cao and Paolo Bonzini . [1] KVM currently uses large bitmaps to track dirty memory. These bitmaps are copied to userspace when userspace queries KVM for its dirty page information. The use of bitmaps is mostly sufficient for live migration, as large parts of memory are be dirtied from one log-dirty pass to another. However, in a checkpointing system, the number of dirty pages is small and in fact it is often bounded---the VM is paused when it has dirtied a pre-defined number of pages. Traversing a large, sparsely populated bitmap to find set bits is time-consuming, as is copying the bitmap to user-space. A similar issue will be there for live migration when the guest memory is huge while the page dirty procedure is trivial. In that case for each dirty sync we need to pull the whole dirty bitmap to userspace and analyse every bit even if it's mostly zeros. The preferred data structure for above scenarios is a dense list of guest frame numbers (GFN). This patch series stores the dirty list in kernel memory that can be memory mapped into userspace to allow speedy harvesting. This patch enables dirty ring for X86 only. However it should be easily extended to other archs as well. [1] https://patchwork.kernel.org/patch/10471409/ Signed-off-by: Lei Cao Signed-off-by: Paolo Bonzini Signed-off-by: Peter Xu Message-Id: <20201001012222.5767-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_dirty_ring.h | 103 +++++++++++++++++++++++++++++++++++++++++ include/linux/kvm_host.h | 13 ++++++ 2 files changed, 116 insertions(+) create mode 100644 include/linux/kvm_dirty_ring.h (limited to 'include/linux') diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h new file mode 100644 index 000000000000..120e5e90fa1d --- /dev/null +++ b/include/linux/kvm_dirty_ring.h @@ -0,0 +1,103 @@ +#ifndef KVM_DIRTY_RING_H +#define KVM_DIRTY_RING_H + +#include + +/** + * kvm_dirty_ring: KVM internal dirty ring structure + * + * @dirty_index: free running counter that points to the next slot in + * dirty_ring->dirty_gfns, where a new dirty page should go + * @reset_index: free running counter that points to the next dirty page + * in dirty_ring->dirty_gfns for which dirty trap needs to + * be reenabled + * @size: size of the compact list, dirty_ring->dirty_gfns + * @soft_limit: when the number of dirty pages in the list reaches this + * limit, vcpu that owns this ring should exit to userspace + * to allow userspace to harvest all the dirty pages + * @dirty_gfns: the array to keep the dirty gfns + * @index: index of this dirty ring + */ +struct kvm_dirty_ring { + u32 dirty_index; + u32 reset_index; + u32 size; + u32 soft_limit; + struct kvm_dirty_gfn *dirty_gfns; + int index; +}; + +#if (KVM_DIRTY_LOG_PAGE_OFFSET == 0) +/* + * If KVM_DIRTY_LOG_PAGE_OFFSET not defined, kvm_dirty_ring.o should + * not be included as well, so define these nop functions for the arch. + */ +static inline u32 kvm_dirty_ring_get_rsvd_entries(void) +{ + return 0; +} + +static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, + int index, u32 size) +{ + return 0; +} + +static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm) +{ + return NULL; +} + +static inline int kvm_dirty_ring_reset(struct kvm *kvm, + struct kvm_dirty_ring *ring) +{ + return 0; +} + +static inline void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, + u32 slot, u64 offset) +{ +} + +static inline struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, + u32 offset) +{ + return NULL; +} + +static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) +{ +} + +static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) +{ + return true; +} + +#else /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */ + +u32 kvm_dirty_ring_get_rsvd_entries(void); +int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); +struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm); + +/* + * called with kvm->slots_lock held, returns the number of + * processed pages. + */ +int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring); + +/* + * returns =0: successfully pushed + * <0: unable to push, need to wait + */ +void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset); + +/* for use in vm_operations_struct */ +struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset); + +void kvm_dirty_ring_free(struct kvm_dirty_ring *ring); +bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring); + +#endif /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */ + +#endif /* KVM_DIRTY_RING_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ca7c1459a8e3..864b156391c8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -34,6 +34,7 @@ #include #include +#include #ifndef KVM_MAX_VCPU_ID #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS @@ -319,6 +320,7 @@ struct kvm_vcpu { bool preempted; bool ready; struct kvm_vcpu_arch arch; + struct kvm_dirty_ring dirty_ring; }; static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) @@ -505,6 +507,7 @@ struct kvm { struct srcu_struct irq_srcu; pid_t userspace_pid; unsigned int max_halt_poll_ns; + u32 dirty_ring_size; }; #define kvm_err(fmt, ...) \ @@ -1477,4 +1480,14 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) } #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ +/* + * This defines how many reserved entries we want to keep before we + * kick the vcpu to the userspace to avoid dirty ring full. This + * value can be tuned to higher if e.g. PML is enabled on the host. + */ +#define KVM_DIRTY_RING_RSVD_ENTRIES 64 + +/* Max number of entries allowed for each kvm dirty ring */ +#define KVM_DIRTY_RING_MAX_ENTRIES 65536 + #endif -- cgit v1.2.3 From 044c59c409b7fd753707dc437890e94d2b0bd819 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Sep 2020 21:22:26 -0400 Subject: KVM: Don't allocate dirty bitmap if dirty ring is enabled Because kvm dirty rings and kvm dirty log is used in an exclusive way, Let's avoid creating the dirty_bitmap when kvm dirty ring is enabled. At the meantime, since the dirty_bitmap will be conditionally created now, we can't use it as a sign of "whether this memory slot enabled dirty tracking". Change users like that to check against the kvm memory slot flags. Note that there still can be chances where the kvm memory slot got its dirty_bitmap allocated, _if_ the memory slots are created before enabling of the dirty rings and at the same time with the dirty tracking capability enabled, they'll still with the dirty_bitmap. However it should not hurt much (e.g., the bitmaps will always be freed if they are there), and the real users normally won't trigger this because dirty bit tracking flag should in most cases only be applied to kvm slots only before migration starts, that should be far latter than kvm initializes (VM starts). Signed-off-by: Peter Xu Message-Id: <20201001012226.5868-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 864b156391c8..f3b1013fb22c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -351,6 +351,11 @@ struct kvm_memory_slot { u16 as_id; }; +static inline bool kvm_slot_dirty_track_enabled(struct kvm_memory_slot *slot) +{ + return slot->flags & KVM_MEM_LOG_DIRTY_PAGES; +} + static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) { return ALIGN(memslot->npages, BITS_PER_LONG) / 8; -- cgit v1.2.3 From 78a56e0494ad29feccd4c54c2b5682721f8cb988 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 4 Nov 2020 15:01:57 -0800 Subject: entry: Fix spelling/typo errors in irq entry code s/reguired/required/ s/Interupts/Interrupts/ s/quiescient/quiescent/ s/assemenbly/assembly/ Signed-off-by: Ira Weiny Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201104230157.3378023-1-ira.weiny@intel.com --- include/linux/entry-common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 1a128baf3628..aab549026ab8 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -415,7 +415,7 @@ void irqentry_exit_cond_resched(void); * @state: Return value from matching call to irqentry_enter() * * Depending on the return target (kernel/user) this runs the necessary - * preemption and work checks if possible and reguired and returns to + * preemption and work checks if possible and required and returns to * the caller with interrupts disabled and no further work pending. * * This is the last action before returning to the low level ASM code which @@ -438,7 +438,7 @@ irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs); * @regs: Pointer to pt_regs (NMI entry regs) * @irq_state: Return value from matching call to irqentry_nmi_enter() * - * Last action before returning to the low level assmenbly code. + * Last action before returning to the low level assembly code. * * Counterpart to irqentry_nmi_enter(). */ -- cgit v1.2.3 From 23d89aa0c2192f2d4582198b381d8805492c7925 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Wed, 11 Nov 2020 13:11:18 +0200 Subject: firmware: imx-dsp: Export functions to request/free channels In order to save power, we only need to request a channel when the communication with the DSP active. For this we export the following functions: - imx_dsp_request_channel, gets a channel with a given index - imx_dsp_free_channel, frees a channel with a given index Notice that we still request channels at probe to support devices that do not have PM callbacks implemented. More explanations about why requesting a channel has an effect on power savings: - requesting an mailbox channel will call mailbox's startup function. - startup function calls pm_runtime_get_sync which increments device usage count and will keep the device active. Specifically, mailbox clock will be always ON when a mailbox channel is requested. Signed-off-by: Daniel Baluta Reviewed-by: Paul Olaru Signed-off-by: Shawn Guo --- include/linux/firmware/imx/dsp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/imx/dsp.h b/include/linux/firmware/imx/dsp.h index 7562099c9e46..4f7895a3b73c 100644 --- a/include/linux/firmware/imx/dsp.h +++ b/include/linux/firmware/imx/dsp.h @@ -55,6 +55,9 @@ static inline void *imx_dsp_get_data(struct imx_dsp_ipc *ipc) int imx_dsp_ring_doorbell(struct imx_dsp_ipc *dsp, unsigned int chan_idx); +struct mbox_chan *imx_dsp_request_channel(struct imx_dsp_ipc *ipc, int idx); +void imx_dsp_free_channel(struct imx_dsp_ipc *ipc, int idx); + #else static inline int imx_dsp_ring_doorbell(struct imx_dsp_ipc *ipc, @@ -63,5 +66,12 @@ static inline int imx_dsp_ring_doorbell(struct imx_dsp_ipc *ipc, return -ENOTSUPP; } +struct mbox_chan *imx_dsp_request_channel(struct imx_dsp_ipc *ipc, int idx) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +void imx_dsp_free_channel(struct imx_dsp_ipc *ipc, int idx) { } + #endif #endif /* _IMX_DSP_IPC_H */ -- cgit v1.2.3 From cfeeea60af2f01c13b94d57a9bb1291e7bc181da Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 16 Nov 2020 12:57:13 +0200 Subject: bus: ti-sysc: Implement GPMC debug quirk to drop platform data We need to enable no-reset-on-init quirk for GPMC if the config option for CONFIG_OMAP_GPMC_DEBUG is set. Otherwise the GPMC driver code is unable to show the bootloader configured timings. Signed-off-by: Tony Lindgren --- include/linux/platform_data/ti-sysc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index 240dce553a0b..fafc1beea504 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -50,6 +50,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_QUIRK_GPMC_DEBUG BIT(26) #define SYSC_MODULE_QUIRK_ENA_RESETDONE BIT(25) #define SYSC_MODULE_QUIRK_PRUSS BIT(24) #define SYSC_MODULE_QUIRK_DSS_RESET BIT(23) -- cgit v1.2.3 From ead49373d2916080509f51fc6a4ee8f9bc021b9b Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 29 Oct 2020 09:57:16 +0800 Subject: mmc: core: Initial support for SD express card/host In the SD specification v7.10 the SD express card has been added. This new type of removable SD card, can be managed via a PCIe/NVMe based interface, while also allowing backwards compatibility towards the legacy SD interface. To keep the backwards compatibility, it's required to start the initialization through the legacy SD interface. If it turns out that the mmc host and the SD card, both supports the PCIe/NVMe interface, then a switch should be allowed. Therefore, let's introduce some basic support for this type of SD cards to the mmc core. The mmc host, should set MMC_CAP2_SD_EXP if it supports this interface and MMC_CAP2_SD_EXP_1_2V, if also 1.2V is supported, as to inform the core about it. To deal with the switch to the PCIe/NVMe interface, the mmc host is required to implement a new host ops, ->init_sd_express(). Based on the initial communication between the host and the card, host->ios.timing is set to either MMC_TIMING_SD_EXP or MMC_TIMING_SD_EXP_1_2V, depending on if 1.2V is supported or not. In this way, the mmc host can check these values in its ->init_sd_express() ops, to know how to proceed with the handover. Note that, to manage card insert/removal, the mmc core sticks with using the ->get_cd() callback, which means it's the host's responsibility to make sure it provides valid data, even if the card may be managed by PCIe/NVMe at the moment. As long as the card seems to be present, the mmc core keeps the card powered on. Cc: Greg Kroah-Hartman Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: Rui Feng Signed-off-by: Ulf Hansson Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/1603936636-3126-1-git-send-email-rui_feng@realsil.com.cn --- include/linux/mmc/host.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index c079b932330f..01bba36545c5 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -60,6 +60,8 @@ struct mmc_ios { #define MMC_TIMING_MMC_DDR52 8 #define MMC_TIMING_MMC_HS200 9 #define MMC_TIMING_MMC_HS400 10 +#define MMC_TIMING_SD_EXP 11 +#define MMC_TIMING_SD_EXP_1_2V 12 unsigned char signal_voltage; /* signalling voltage (1.8V or 3.3V) */ @@ -173,6 +175,9 @@ struct mmc_host_ops { */ int (*multi_io_quirk)(struct mmc_card *card, unsigned int direction, int blk_size); + + /* Initialize an SD express card, mandatory for MMC_CAP2_SD_EXP. */ + int (*init_sd_express)(struct mmc_host *host, struct mmc_ios *ios); }; struct mmc_cqe_ops { @@ -358,6 +363,8 @@ struct mmc_host { #define MMC_CAP2_HS200_1_2V_SDR (1 << 6) /* can support */ #define MMC_CAP2_HS200 (MMC_CAP2_HS200_1_8V_SDR | \ MMC_CAP2_HS200_1_2V_SDR) +#define MMC_CAP2_SD_EXP (1 << 7) /* SD express via PCIe */ +#define MMC_CAP2_SD_EXP_1_2V (1 << 8) /* SD express 1.2V */ #define MMC_CAP2_CD_ACTIVE_HIGH (1 << 10) /* Card-detect signal active high */ #define MMC_CAP2_RO_ACTIVE_HIGH (1 << 11) /* Write-protect signal active high */ #define MMC_CAP2_NO_PRESCAN_POWERUP (1 << 14) /* Don't power up before scan */ -- cgit v1.2.3 From 5afe802132f242f5520d2acac09ea05d31e3c7cf Mon Sep 17 00:00:00 2001 From: Rui Feng Date: Thu, 29 Oct 2020 09:57:48 +0800 Subject: misc: rtsx: Add SD Express mode support for RTS5261 RTS5261 support SD mode and PCIe/NVMe mode. The workflow is as follows. 1.RTS5261 work in SD mode and set MMC_CAPS2_SD_EXP flag. 2.If card is plugged in, Host send CMD8 to ask card's PCIe availability. 3.If the card has PCIe availability and WP is not set, init_sd_express() will be invoked, RTS5261 switch to PCIe/NVMe mode. 4.Mmc driver handover it to NVMe driver. 5.If card is unplugged, RTS5261 will switch to SD mode. Signed-off-by: Rui Feng Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/1603936668-3363-1-git-send-email-rui_feng@realsil.com.cn Signed-off-by: Ulf Hansson --- include/linux/rtsx_pci.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 745f5e73f99a..b47959f48ccd 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -658,6 +658,19 @@ #define PM_WAKE_EN 0x01 #define PM_CTRL4 0xFF47 +#define RTS5261_FW_CFG0 0xFF54 +#define RTS5261_FW_ENTER_EXPRESS (0x01 << 0) + +#define RTS5261_FW_CFG1 0xFF55 +#define RTS5261_SYS_CLK_SEL_MCU_CLK (0x01 << 7) +#define RTS5261_CRC_CLK_SEL_MCU_CLK (0x01 << 6) +#define RTS5261_FAKE_MCU_CLOCK_GATING (0x01 << 5) +#define RTS5261_MCU_BUS_SEL_MASK (0x01 << 4) +#define RTS5261_MCU_CLOCK_SEL_MASK (0x03 << 2) +#define RTS5261_MCU_CLOCK_SEL_16M (0x01 << 2) +#define RTS5261_MCU_CLOCK_GATING (0x01 << 1) +#define RTS5261_DRIVER_ENABLE_FW (0x01 << 0) + #define REG_CFG_OOBS_OFF_TIMER 0xFEA6 #define REG_CFG_OOBS_ON_TIMER 0xFEA7 #define REG_CFG_VCM_ON_TIMER 0xFEA8 @@ -701,6 +714,13 @@ #define RTS5260_DVCC_TUNE_MASK 0x70 #define RTS5260_DVCC_33 0x70 +/*RTS5261*/ +#define RTS5261_LDO1_CFG0 0xFF72 +#define RTS5261_LDO1_OCP_THD_MASK (0x07 << 5) +#define RTS5261_LDO1_OCP_EN (0x01 << 4) +#define RTS5261_LDO1_OCP_LMT_THD_MASK (0x03 << 2) +#define RTS5261_LDO1_OCP_LMT_EN (0x01 << 1) + #define LDO_VCC_CFG1 0xFF73 #define LDO_VCC_REF_TUNE_MASK 0x30 #define LDO_VCC_REF_1V2 0x20 @@ -741,6 +761,8 @@ #define RTS5260_AUTOLOAD_CFG4 0xFF7F #define RTS5260_MIMO_DISABLE 0x8A +/*RTS5261*/ +#define RTS5261_AUX_CLK_16M_EN (1 << 5) #define RTS5260_REG_GPIO_CTL0 0xFC1A #define RTS5260_REG_GPIO_MASK 0x01 @@ -1191,6 +1213,7 @@ struct rtsx_pcr { #define EXTRA_CAPS_MMC_HS200 (1 << 4) #define EXTRA_CAPS_MMC_8BIT (1 << 5) #define EXTRA_CAPS_NO_MMC (1 << 7) +#define EXTRA_CAPS_SD_EXPRESS (1 << 8) u32 extra_caps; #define IC_VER_A 0 -- cgit v1.2.3 From 6b7b58f425c3359787483479d73c0bb98ffc65b8 Mon Sep 17 00:00:00 2001 From: Rui Feng Date: Tue, 3 Nov 2020 17:54:29 +0800 Subject: mmc: rtsx: Add test mode for RTS5261 This patch add test mode for RTS5261. If test mode is set, reader will switch to SD Express mode mandatorily, and this mode is used by factory testing only. Signed-off-by: Rui Feng Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/1604397269-2780-1-git-send-email-rui_feng@realsil.com.cn Signed-off-by: Ulf Hansson --- include/linux/rtsx_pci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index b47959f48ccd..db249e8707f3 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -658,6 +658,10 @@ #define PM_WAKE_EN 0x01 #define PM_CTRL4 0xFF47 +/* FW config info register */ +#define RTS5261_FW_CFG_INFO0 0xFF50 +#define RTS5261_FW_EXPRESS_TEST_MASK (0x01 << 0) +#define RTS5261_FW_EA_MODE_MASK (0x01 << 5) #define RTS5261_FW_CFG0 0xFF54 #define RTS5261_FW_ENTER_EXPRESS (0x01 << 0) -- cgit v1.2.3 From 1672617d512880f31d1d43ca0eb0d13d50b8c680 Mon Sep 17 00:00:00 2001 From: Rui Feng Date: Tue, 3 Nov 2020 17:55:12 +0800 Subject: misc: rtsx: Add hardware auto power off for RTS5261 This patch enable hardware auto power off when card is removed. Signed-off-by: Rui Feng Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/1604397312-2991-1-git-send-email-rui_feng@realsil.com.cn Signed-off-by: Ulf Hansson --- include/linux/rtsx_pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index db249e8707f3..fcaadc7c9df1 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -82,6 +82,7 @@ #define MS_OC_INT_EN (1 << 23) #define SD_OC_INT_EN (1 << 22) +#define RTSX_DUM_REG 0x1C /* * macros for easy use @@ -1272,6 +1273,8 @@ struct rtsx_pcr { #define PCI_PID(pcr) ((pcr)->pci->device) #define is_version(pcr, pid, ver) \ (CHK_PCI_PID(pcr, pid) && (pcr)->ic_version == (ver)) +#define is_version_higher_than(pcr, pid, ver) \ + (CHK_PCI_PID(pcr, pid) && (pcr)->ic_version > (ver)) #define pcr_dbg(pcr, fmt, arg...) \ dev_dbg(&(pcr)->pci->dev, fmt, ##arg) -- cgit v1.2.3 From 13daf48978280ea8bce38f1e0598b913b09f5395 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 9 Nov 2020 22:53:16 +0200 Subject: gpiolib: Replace unsigned by unsigned int Replace unsigned by unsigned int in GPIO library code. Note, legacy API left untouched. Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Reviewed-by: Hans de Goede Reviewed-by: Mika Westerberg --- include/linux/gpio/consumer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 901aab89d025..ef49307611d2 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -158,7 +158,7 @@ int gpiod_set_raw_array_value_cansleep(unsigned int array_size, unsigned long *value_bitmap); int gpiod_set_config(struct gpio_desc *desc, unsigned long config); -int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce); +int gpiod_set_debounce(struct gpio_desc *desc, unsigned int debounce); int gpiod_set_transitory(struct gpio_desc *desc, bool transitory); void gpiod_toggle_active_low(struct gpio_desc *desc); @@ -481,7 +481,7 @@ static inline int gpiod_set_config(struct gpio_desc *desc, unsigned long config) return -ENOSYS; } -static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce) +static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned int debounce) { /* GPIO can never have been requested */ WARN_ON(desc); -- cgit v1.2.3 From cc947f2b9c04113d84eeef67cc7c6326e1982019 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Nov 2020 10:53:38 +0100 Subject: timers: Make run_local_timers() static No users outside of the timer code. Move the caller below this function to avoid a pointless forward declaration. Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index d10bc7e73b41..fda13c9d1256 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -193,7 +193,6 @@ extern int try_to_del_timer_sync(struct timer_list *timer); #define del_singleshot_timer_sync(t) del_timer_sync(t) extern void init_timers(void); -extern void run_local_timers(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); -- cgit v1.2.3 From 66981c37b3199d293c58f84cf2366e86a06e1a3d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 16 Nov 2020 11:18:14 +0100 Subject: hrtimer: Fix kernel-doc markups The hrtimer_get_remaining() markup is documenting, instead, __hrtimer_get_remaining(), as it is placed at the C file. In order to properly document it, a kernel-doc markup is needed together with the function prototype. So, add a new one, while preserving the existing one, just fixing the function name. The hrtimer_is_queued prototype has a typo: it is using '=' instead of '-' to split: identifier - description as required by kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/9dc87808c2fd07b7e050bafcd033c5ef05808fea.1605521731.git.mchehab+huawei@kernel.org --- include/linux/hrtimer.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 107cedd7019a..bb5e7b0a4274 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -447,6 +447,10 @@ static inline void hrtimer_restart(struct hrtimer *timer) /* Query timers: */ extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); +/** + * hrtimer_get_remaining - get remaining time for the timer + * @timer: the timer to read + */ static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer) { return __hrtimer_get_remaining(timer, false); @@ -458,7 +462,7 @@ extern u64 hrtimer_next_event_without(const struct hrtimer *exclude); extern bool hrtimer_active(const struct hrtimer *timer); /** - * hrtimer_is_queued = check, whether the timer is on one of the queues + * hrtimer_is_queued - check, whether the timer is on one of the queues * @timer: Timer to check * * Returns: True if the timer is queued, false otherwise -- cgit v1.2.3 From e00adcadf3af7a8335026d71ab9f0e0a922191ac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Nov 2020 11:00:11 +0100 Subject: block: add a new set_read_only method Add a new method to allow for driver-specific processing when setting or clearing the block device read-only state. This allows to replace the cumbersome and error-prone override of the whole ioctl implementation. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 639cae2c158b..5c1ba8a8d2bc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1850,6 +1850,7 @@ struct block_device_operations { void (*unlock_native_capacity) (struct gendisk *); int (*revalidate_disk) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); + int (*set_read_only)(struct block_device *bdev, bool ro); /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, -- cgit v1.2.3 From 98f49b63e84d4ee1a5c327d0b5f4e8699f6c70fe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Nov 2020 11:00:17 +0100 Subject: block: remove set_device_ro Fold set_device_ro into its only remaining caller. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 03da3f603d30..52eb592c874a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -304,7 +304,6 @@ extern void del_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern struct block_device *bdget_disk(struct gendisk *disk, int partno); -extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); static inline int get_disk_ro(struct gendisk *disk) -- cgit v1.2.3 From a7cb3d2f09c8405aed59d97a7d02cebea43cd3c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Nov 2020 11:00:18 +0100 Subject: block: remove __blkdev_driver_ioctl Just open code it in the few callers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5c1ba8a8d2bc..05b346a68c2e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1867,8 +1867,6 @@ extern int blkdev_compat_ptr_ioctl(struct block_device *, fmode_t, #define blkdev_compat_ptr_ioctl NULL #endif -extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, - unsigned long); extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); -- cgit v1.2.3 From a160c6159d4a0cf82f28bc1658a958e278ec3688 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Oct 2020 15:58:28 +0100 Subject: block: add an optional probe callback to major_names Add a callback to the major_names array that allows a driver to override how to probe for dev_t that doesn't currently have a gendisk registered. This will help separating the lookup of the gendisk by dev_t vs probe action for a not currently registered dev_t. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/genhd.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 52eb592c874a..811d0f83c4cf 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -367,7 +367,10 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) -int register_blkdev(unsigned int major, const char *name); +int __register_blkdev(unsigned int major, const char *name, + void (*probe)(dev_t devt)); +#define register_blkdev(major, name) \ + __register_blkdev(major, name, NULL) void unregister_blkdev(unsigned int major, const char *name); void revalidate_disk_size(struct gendisk *disk, bool verbose); -- cgit v1.2.3 From d18e8b1bf9e2ee814a7f886a156bf762d52e178b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Oct 2020 15:58:29 +0100 Subject: ide: remove ide_{,un}register_region There is no need to ever register the fake gendisk used for ide-tape. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/ide.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 62653769509f..2c300689a51a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1493,9 +1493,6 @@ static inline void ide_acpi_port_init_devices(ide_hwif_t *hwif) { ; } static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} #endif -void ide_register_region(struct gendisk *); -void ide_unregister_region(struct gendisk *); - void ide_check_nien_quirk_list(ide_drive_t *); void ide_undecoded_slave(ide_drive_t *); -- cgit v1.2.3 From e418de3abcda8b102f737919e830024d1455938f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Oct 2020 15:58:41 +0100 Subject: block: switch gendisk lookup to a simple xarray Now that bdev_map is only used for finding gendisks, we can use a simple xarray instead of the regions tracking structure for it. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jens Axboe --- include/linux/genhd.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 811d0f83c4cf..3cbc2781ef34 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -339,15 +339,8 @@ int blk_add_partitions(struct gendisk *disk, struct block_device *bdev); int blk_drop_partitions(struct block_device *bdev); extern struct gendisk *__alloc_disk_node(int minors, int node_id); -extern struct kobject *get_disk_and_module(struct gendisk *disk); extern void put_disk(struct gendisk *disk); extern void put_disk_and_module(struct gendisk *disk); -extern void blk_register_region(dev_t devt, unsigned long range, - struct module *module, - struct kobject *(*probe)(dev_t, int *, void *), - int (*lock)(dev_t, void *), - void *data); -extern void blk_unregister_region(dev_t devt, unsigned long range); #define alloc_disk_node(minors, node_id) \ ({ \ -- cgit v1.2.3 From 7a089ec7d77fe7d50f6bb7b178fa25eec9fd822b Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Thu, 12 Nov 2020 07:04:03 -0500 Subject: console: Delete unused con_font_copy() callback implementations Recently in commit 3c4e0dff2095 ("vt: Disable KD_FONT_OP_COPY") we disabled the KD_FONT_OP_COPY ioctl() option. Delete all the con_font_copy() callbacks, since we no longer use them. Mark KD_FONT_OP_COPY as "obsolete" in include/uapi/linux/kd.h, just like what we have done for PPPIOCDETACH in commit af8d3c7c001a ("ppp: remove the PPPIOCDETACH ioctl"). Signed-off-by: Peilin Ye Reviewed-by: Greg Kroah-Hartman Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/c8d28007edf50de4387e1532eb3eb736db716f73.1605169912.git.yepeilin.cs@gmail.com --- include/linux/console.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 4b1e26c4cb42..20874db50bc8 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -62,7 +62,6 @@ struct consw { int (*con_font_get)(struct vc_data *vc, struct console_font *font); int (*con_font_default)(struct vc_data *vc, struct console_font *font, char *name); - int (*con_font_copy)(struct vc_data *vc, int con); int (*con_resize)(struct vc_data *vc, unsigned int width, unsigned int height, unsigned int user); void (*con_set_palette)(struct vc_data *vc, -- cgit v1.2.3 From 4ee573086bd88ff3060dda07873bf755d332e9ba Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Thu, 12 Nov 2020 07:13:34 -0500 Subject: Fonts: Add charcount field to font_desc Subsystems are hard-coding the number of characters of our built-in fonts as 256. Include that information in our kernel font descriptor, `struct font_desc`. Signed-off-by: Peilin Ye Reviewed-by: Daniel Vetter Reviewed-by: Greg Kroah-Hartman Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/65952296d1d9486093bd955d1536f7dcd11112c6.1605169912.git.yepeilin.cs@gmail.com --- include/linux/font.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/font.h b/include/linux/font.h index 4f50d736ea72..abf1442ce719 100644 --- a/include/linux/font.h +++ b/include/linux/font.h @@ -17,6 +17,7 @@ struct font_desc { int idx; const char *name; unsigned int width, height; + unsigned int charcount; const void *data; int pref; }; -- cgit v1.2.3 From 449f4ec9892ebc2f37a7eae6d97db2cf7c65e09a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Nov 2020 15:56:56 +0100 Subject: block: remove the update_bdev parameter to set_capacity_revalidate_and_notify The update_bdev argument is always set to true, so remove it. Also rename the function to the slighly less verbose set_capacity_and_notify, as propagating the disk size to the block device isn't really revalidation. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Petr Vorel Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3cbc2781ef34..46553d6d6025 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -314,8 +314,7 @@ static inline int get_disk_ro(struct gendisk *disk) extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); -bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, - bool update_bdev); +bool set_capacity_and_notify(struct gendisk *disk, sector_t size); /* drivers/char/random.c */ extern void add_disk_randomness(struct gendisk *disk) __latent_entropy; -- cgit v1.2.3 From cef397038167ac15d085914493d6c86385773709 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Nov 2020 17:52:58 +0100 Subject: arch: pgtable: define MAX_POSSIBLE_PHYSMEM_BITS where needed Stefan Agner reported a bug when using zsram on 32-bit Arm machines with RAM above the 4GB address boundary: Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = a27bd01c [00000000] *pgd=236a0003, *pmd=1ffa64003 Internal error: Oops: 207 [#1] SMP ARM Modules linked in: mdio_bcm_unimac(+) brcmfmac cfg80211 brcmutil raspberrypi_hwmon hci_uart crc32_arm_ce bcm2711_thermal phy_generic genet CPU: 0 PID: 123 Comm: mkfs.ext4 Not tainted 5.9.6 #1 Hardware name: BCM2711 PC is at zs_map_object+0x94/0x338 LR is at zram_bvec_rw.constprop.0+0x330/0xa64 pc : [] lr : [] psr: 60000013 sp : e376bbe0 ip : 00000000 fp : c1e2921c r10: 00000002 r9 : c1dda730 r8 : 00000000 r7 : e8ff7a00 r6 : 00000000 r5 : 02f9ffa0 r4 : e3710000 r3 : 000fdffe r2 : c1e0ce80 r1 : ebf979a0 r0 : 00000000 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 30c5383d Table: 235c2a80 DAC: fffffffd Process mkfs.ext4 (pid: 123, stack limit = 0x495a22e6) Stack: (0xe376bbe0 to 0xe376c000) As it turns out, zsram needs to know the maximum memory size, which is defined in MAX_PHYSMEM_BITS when CONFIG_SPARSEMEM is set, or in MAX_POSSIBLE_PHYSMEM_BITS on the x86 architecture. The same problem will be hit on all 32-bit architectures that have a physical address space larger than 4GB and happen to not enable sparsemem and include asm/sparsemem.h from asm/pgtable.h. After the initial discussion, I suggested just always defining MAX_POSSIBLE_PHYSMEM_BITS whenever CONFIG_PHYS_ADDR_T_64BIT is set, or provoking a build error otherwise. This addresses all configurations that can currently have this runtime bug, but leaves all other configurations unchanged. I looked up the possible number of bits in source code and datasheets, here is what I found: - on ARC, CONFIG_ARC_HAS_PAE40 controls whether 32 or 40 bits are used - on ARM, CONFIG_LPAE enables 40 bit addressing, without it we never support more than 32 bits, even though supersections in theory allow up to 40 bits as well. - on MIPS, some MIPS32r1 or later chips support 36 bits, and MIPS32r5 XPA supports up to 60 bits in theory, but 40 bits are more than anyone will ever ship - On PowerPC, there are three different implementations of 36 bit addressing, but 32-bit is used without CONFIG_PTE_64BIT - On RISC-V, the normal page table format can support 34 bit addressing. There is no highmem support on RISC-V, so anything above 2GB is unused, but it might be useful to eventually support CONFIG_ZRAM for high pages. Fixes: 61989a80fb3a ("staging: zsmalloc: zsmalloc memory allocation library") Fixes: 02390b87a945 ("mm/zsmalloc: Prepare to variable MAX_PHYSMEM_BITS") Acked-by: Thomas Bogendoerfer Reviewed-by: Stefan Agner Tested-by: Stefan Agner Acked-by: Mike Rapoport Link: https://lore.kernel.org/linux-mm/bdfa44bf1c570b05d6c70898e2bbb0acf234ecdf.1604762181.git.stefan@agner.ch/ Signed-off-by: Arnd Bergmann --- include/linux/pgtable.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 71125a4676c4..e237004d498d 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1427,6 +1427,19 @@ typedef unsigned int pgtbl_mod_mask; #endif /* !__ASSEMBLY__ */ +#if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT) +#ifdef CONFIG_PHYS_ADDR_T_64BIT +/* + * ZSMALLOC needs to know the highest PFN on 32-bit architectures + * with physical address space extension, but falls back to + * BITS_PER_LONG otherwise. + */ +#error Missing MAX_POSSIBLE_PHYSMEM_BITS definition +#else +#define MAX_POSSIBLE_PHYSMEM_BITS 32 +#endif +#endif + #ifndef has_transparent_hugepage #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define has_transparent_hugepage() 1 -- cgit v1.2.3 From 557acb3d2cd9c82de19f944f6cc967a347735385 Mon Sep 17 00:00:00 2001 From: Amjad Ouled-Ameur Date: Fri, 13 Nov 2020 00:00:43 +0100 Subject: reset: make shared pulsed reset controls re-triggerable The current reset framework API does not allow to release what is done by reset_control_reset(), IOW decrement triggered_count. Add the new reset_control_rearm() call to do so. When reset_control_reset() has been called once, the counter triggered_count, in the reset framework, is incremented i.e the resource under the reset is in-use and the reset should not be done again. reset_control_rearm() would be the way to state that the resource is no longer used and, that from the caller's perspective, the reset can be fired again if necessary. Signed-off-by: Amjad Ouled-Ameur Reported-by: Jerome Brunet Signed-off-by: Philipp Zabel --- include/linux/reset.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/reset.h b/include/linux/reset.h index 05aa9f440f48..439fec7112a9 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -13,6 +13,7 @@ struct reset_control; #ifdef CONFIG_RESET_CONTROLLER int reset_control_reset(struct reset_control *rstc); +int reset_control_rearm(struct reset_control *rstc); int reset_control_assert(struct reset_control *rstc); int reset_control_deassert(struct reset_control *rstc); int reset_control_status(struct reset_control *rstc); -- cgit v1.2.3 From 872f690341948b502c93318f806d821c56772c42 Mon Sep 17 00:00:00 2001 From: Francis Laniel Date: Sun, 15 Nov 2020 18:08:06 +0100 Subject: treewide: rename nla_strlcpy to nla_strscpy. Calls to nla_strlcpy are now replaced by calls to nla_strscpy which is the new name of this function. Signed-off-by: Francis Laniel Reviewed-by: Kees Cook Signed-off-by: Jakub Kicinski --- include/linux/genl_magic_struct.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index eeae59d3ceb7..35d21fddaf2d 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -89,7 +89,7 @@ static inline int nla_put_u64_0pad(struct sk_buff *skb, int attrtype, u64 value) nla_get_u64, nla_put_u64_0pad, false) #define __str_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ - nla_strlcpy, nla_put, false) + nla_strscpy, nla_put, false) #define __bin_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ nla_memcpy, nla_put, false) -- cgit v1.2.3 From dd8088d5a8969dc2b42f71d7bc01c25c61a78066 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 10 Nov 2020 17:29:32 +0800 Subject: PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter In many case, we need to check return value of pm_runtime_get_sync, but it brings a trouble to the usage counter processing. Many callers forget to decrease the usage counter when it failed, which could resulted in reference leak. It has been discussed a lot[0][1]. So we add a function to deal with the usage counter for better coding. [0]https://lkml.org/lkml/2020/6/14/88 [1]https://patchwork.ozlabs.org/project/linux-tegra/list/?series=178139 Signed-off-by: Zhang Qilong Acked-by: Rafael J. Wysocki Signed-off-by: Jakub Kicinski --- include/linux/pm_runtime.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 4b708f4e8eed..b492ae00cc90 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -386,6 +386,27 @@ static inline int pm_runtime_get_sync(struct device *dev) return __pm_runtime_resume(dev, RPM_GET_PUT); } +/** + * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it. + * @dev: Target device. + * + * Resume @dev synchronously and if that is successful, increment its runtime + * PM usage counter. Return 0 if the runtime PM usage counter of @dev has been + * incremented or a negative error code otherwise. + */ +static inline int pm_runtime_resume_and_get(struct device *dev) +{ + int ret; + + ret = __pm_runtime_resume(dev, RPM_GET_PUT); + if (ret < 0) { + pm_runtime_put_noidle(dev); + return ret; + } + + return 0; +} + /** * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0. * @dev: Target device. -- cgit v1.2.3 From 3136b93c3fb2b7c19e853e049203ff8f2b9dd2cd Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:41:58 -0500 Subject: entry: Expose helpers to migrate TIF to SYSCALL_WORK flags With the goal to split the syscall work related flags into a separate field that is architecture independent, expose transitional helpers that resolve to either the TIF flags or to the corresponding SYSCALL_WORK flags. This will allow architectures to migrate only when they port to the generic syscall entry code. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-3-krisman@collabora.com --- include/linux/thread_info.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index e93e249a4e9b..0e9fb15d6b42 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -97,6 +97,38 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) #define test_thread_flag(flag) \ test_ti_thread_flag(current_thread_info(), flag) +#ifdef CONFIG_GENERIC_ENTRY +#define set_syscall_work(fl) \ + set_bit(SYSCALL_WORK_BIT_##fl, ¤t_thread_info()->syscall_work) +#define test_syscall_work(fl) \ + test_bit(SYSCALL_WORK_BIT_##fl, ¤t_thread_info()->syscall_work) +#define clear_syscall_work(fl) \ + clear_bit(SYSCALL_WORK_BIT_##fl, ¤t_thread_info()->syscall_work) + +#define set_task_syscall_work(t, fl) \ + set_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work) +#define test_task_syscall_work(t, fl) \ + test_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work) +#define clear_task_syscall_work(t, fl) \ + clear_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work) + +#else /* CONFIG_GENERIC_ENTRY */ + +#define set_syscall_work(fl) \ + set_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl) +#define test_syscall_work(fl) \ + test_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl) +#define clear_syscall_work(fl) \ + clear_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl) + +#define set_task_syscall_work(t, fl) \ + set_ti_thread_flag(task_thread_info(t), TIF_##fl) +#define test_task_syscall_work(t, fl) \ + test_ti_thread_flag(task_thread_info(t), TIF_##fl) +#define clear_task_syscall_work(t, fl) \ + clear_ti_thread_flag(task_thread_info(t), TIF_##fl) +#endif /* !CONFIG_GENERIC_ENTRY */ + #define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES -- cgit v1.2.3 From b86678cf0f1d76062aa964c5f0c6c89fe5a6dcfd Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:41:59 -0500 Subject: entry: Wire up syscall_work in common entry code Prepare the common entry code to use the SYSCALL_WORK flags. They will be defined in subsequent patches for each type of syscall work. SYSCALL_WORK_ENTRY/EXIT are defined for the transition, as they will replace the TIF_ equivalent defines. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-4-krisman@collabora.com --- include/linux/entry-common.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index aab549026ab8..3fe8f868f15e 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -64,6 +64,9 @@ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | ARCH_SYSCALL_EXIT_WORK) +#define SYSCALL_WORK_ENTER (0) +#define SYSCALL_WORK_EXIT (0) + /* * TIF flags handled in exit_to_user_mode_loop() */ -- cgit v1.2.3 From 23d67a54857a768acdb0804cdd6037c324a50ecd Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:42:00 -0500 Subject: seccomp: Migrate to use SYSCALL_WORK flag On architectures using the generic syscall entry code the architecture independent syscall work is moved to flags in thread_info::syscall_work. This removes architecture dependencies and frees up TIF bits. Define SYSCALL_WORK_SECCOMP, use it in the generic entry code and convert the code which uses the TIF specific helper functions to use the new *_syscall_work() helpers which either resolve to the new mode for users of the generic entry code or to the TIF based functions for the other architectures. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-5-krisman@collabora.com --- include/linux/entry-common.h | 8 ++------ include/linux/seccomp.h | 2 +- include/linux/thread_info.h | 6 ++++++ 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 3fe8f868f15e..fa3cdb102dbf 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -21,10 +21,6 @@ # define _TIF_SYSCALL_TRACEPOINT (0) #endif -#ifndef _TIF_SECCOMP -# define _TIF_SECCOMP (0) -#endif - #ifndef _TIF_SYSCALL_AUDIT # define _TIF_SYSCALL_AUDIT (0) #endif @@ -49,7 +45,7 @@ #endif #define SYSCALL_ENTER_WORK \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU | \ ARCH_SYSCALL_ENTER_WORK) @@ -64,7 +60,7 @@ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | ARCH_SYSCALL_EXIT_WORK) -#define SYSCALL_WORK_ENTER (0) +#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP) #define SYSCALL_WORK_EXIT (0) /* diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 02aef2844c38..47763f3999f7 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -42,7 +42,7 @@ struct seccomp { extern int __secure_computing(const struct seccomp_data *sd); static inline int secure_computing(void) { - if (unlikely(test_thread_flag(TIF_SECCOMP))) + if (unlikely(test_syscall_work(SECCOMP))) return __secure_computing(NULL); return 0; } diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 0e9fb15d6b42..a308ba4ef07b 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -35,6 +35,12 @@ enum { GOOD_STACK, }; +enum syscall_work_bit { + SYSCALL_WORK_BIT_SECCOMP, +}; + +#define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) + #include #ifdef __KERNEL__ -- cgit v1.2.3 From 524666cb5de7c38a1925e7401a6e59d68682dd8c Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:42:01 -0500 Subject: tracepoints: Migrate to use SYSCALL_WORK flag On architectures using the generic syscall entry code the architecture independent syscall work is moved to flags in thread_info::syscall_work. This removes architecture dependencies and frees up TIF bits. Define SYSCALL_WORK_SYSCALL_TRACEPOINT, use it in the generic entry code and convert the code which uses the TIF specific helper functions to use the new *_syscall_work() helpers which either resolve to the new mode for users of the generic entry code or to the TIF based functions for the other architectures. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-6-krisman@collabora.com --- include/linux/entry-common.h | 13 +++++-------- include/linux/thread_info.h | 2 ++ 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index fa3cdb102dbf..2a01eee2dbba 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -17,10 +17,6 @@ # define _TIF_SYSCALL_EMU (0) #endif -#ifndef _TIF_SYSCALL_TRACEPOINT -# define _TIF_SYSCALL_TRACEPOINT (0) -#endif - #ifndef _TIF_SYSCALL_AUDIT # define _TIF_SYSCALL_AUDIT (0) #endif @@ -46,7 +42,7 @@ #define SYSCALL_ENTER_WORK \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU | \ + _TIF_SYSCALL_EMU | \ ARCH_SYSCALL_ENTER_WORK) /* @@ -58,10 +54,11 @@ #define SYSCALL_EXIT_WORK \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SYSCALL_TRACEPOINT | ARCH_SYSCALL_EXIT_WORK) + ARCH_SYSCALL_EXIT_WORK) -#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP) -#define SYSCALL_WORK_EXIT (0) +#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ + SYSCALL_WORK_SYSCALL_TRACEPOINT) +#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT) /* * TIF flags handled in exit_to_user_mode_loop() diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index a308ba4ef07b..c232043c12d3 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -37,9 +37,11 @@ enum { enum syscall_work_bit { SYSCALL_WORK_BIT_SECCOMP, + SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT, }; #define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) +#define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT) #include -- cgit v1.2.3 From 64c19ba29b66e98af9306b4a7525fb22c895d252 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:42:02 -0500 Subject: ptrace: Migrate to use SYSCALL_TRACE flag On architectures using the generic syscall entry code the architecture independent syscall work is moved to flags in thread_info::syscall_work. This removes architecture dependencies and frees up TIF bits. Define SYSCALL_WORK_SYSCALL_TRACE, use it in the generic entry code and convert the code which uses the TIF specific helper functions to use the new *_syscall_work() helpers which either resolve to the new mode for users of the generic entry code or to the TIF based functions for the other architectures. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-7-krisman@collabora.com --- include/linux/entry-common.h | 10 ++++++---- include/linux/thread_info.h | 2 ++ include/linux/tracehook.h | 17 +++++++++-------- 3 files changed, 17 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 2a01eee2dbba..ae426ab9c372 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -41,7 +41,7 @@ #endif #define SYSCALL_ENTER_WORK \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + (_TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_EMU | \ ARCH_SYSCALL_ENTER_WORK) @@ -53,12 +53,14 @@ #endif #define SYSCALL_EXIT_WORK \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + (_TIF_SYSCALL_AUDIT | \ ARCH_SYSCALL_EXIT_WORK) #define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ - SYSCALL_WORK_SYSCALL_TRACEPOINT) -#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT) + SYSCALL_WORK_SYSCALL_TRACEPOINT | \ + SYSCALL_WORK_SYSCALL_TRACE) +#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ + SYSCALL_WORK_SYSCALL_TRACE) /* * TIF flags handled in exit_to_user_mode_loop() diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index c232043c12d3..761a4590d554 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -38,10 +38,12 @@ enum { enum syscall_work_bit { SYSCALL_WORK_BIT_SECCOMP, SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT, + SYSCALL_WORK_BIT_SYSCALL_TRACE, }; #define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) #define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT) +#define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE) #include diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index f7d82e4fafd6..3f20368afe9e 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -83,11 +83,12 @@ static inline int ptrace_report_syscall(struct pt_regs *regs, * tracehook_report_syscall_entry - task is about to attempt a system call * @regs: user register state of current task * - * This will be called if %TIF_SYSCALL_TRACE or %TIF_SYSCALL_EMU have been set, - * when the current task has just entered the kernel for a system call. - * Full user register state is available here. Changing the values - * in @regs can affect the system call number and arguments to be tried. - * It is safe to block here, preventing the system call from beginning. + * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or + * %TIF_SYSCALL_EMU have been set, when the current task has just + * entered the kernel for a system call. Full user register state is + * available here. Changing the values in @regs can affect the system + * call number and arguments to be tried. It is safe to block here, + * preventing the system call from beginning. * * Returns zero normally, or nonzero if the calling arch code should abort * the system call. That must prevent normal entry so no system call is @@ -109,15 +110,15 @@ static inline __must_check int tracehook_report_syscall_entry( * @regs: user register state of current task * @step: nonzero if simulating single-step or block-step * - * This will be called if %TIF_SYSCALL_TRACE has been set, when the - * current task has just finished an attempted system call. Full + * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when + * the current task has just finished an attempted system call. Full * user register state is available here. It is safe to block here, * preventing signals from being processed. * * If @step is nonzero, this report is also in lieu of the normal * trap that would follow the system call instruction because * user_enable_block_step() or user_enable_single_step() was used. - * In this case, %TIF_SYSCALL_TRACE might not be set. + * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set. * * Called without locks, just before checking for pending signals. */ -- cgit v1.2.3 From 64eb35f701f04b30706e21d1b02636b5d31a37d2 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:42:03 -0500 Subject: ptrace: Migrate TIF_SYSCALL_EMU to use SYSCALL_WORK flag On architectures using the generic syscall entry code the architecture independent syscall work is moved to flags in thread_info::syscall_work. This removes architecture dependencies and frees up TIF bits. Define SYSCALL_WORK_SYSCALL_EMU, use it in the generic entry code and convert the code which uses the TIF specific helper functions to use the new *_syscall_work() helpers which either resolve to the new mode for users of the generic entry code or to the TIF based functions for the other architectures. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-8-krisman@collabora.com --- include/linux/entry-common.h | 8 ++------ include/linux/thread_info.h | 2 ++ include/linux/tracehook.h | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index ae426ab9c372..b30f82bed92b 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -13,10 +13,6 @@ * Define dummy _TIF work flags if not defined by the architecture or for * disabled functionality. */ -#ifndef _TIF_SYSCALL_EMU -# define _TIF_SYSCALL_EMU (0) -#endif - #ifndef _TIF_SYSCALL_AUDIT # define _TIF_SYSCALL_AUDIT (0) #endif @@ -42,7 +38,6 @@ #define SYSCALL_ENTER_WORK \ (_TIF_SYSCALL_AUDIT | \ - _TIF_SYSCALL_EMU | \ ARCH_SYSCALL_ENTER_WORK) /* @@ -58,7 +53,8 @@ #define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ SYSCALL_WORK_SYSCALL_TRACEPOINT | \ - SYSCALL_WORK_SYSCALL_TRACE) + SYSCALL_WORK_SYSCALL_TRACE | \ + SYSCALL_WORK_SYSCALL_EMU) #define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ SYSCALL_WORK_SYSCALL_TRACE) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 761a4590d554..85b8a4216168 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -39,11 +39,13 @@ enum syscall_work_bit { SYSCALL_WORK_BIT_SECCOMP, SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT, SYSCALL_WORK_BIT_SYSCALL_TRACE, + SYSCALL_WORK_BIT_SYSCALL_EMU, }; #define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) #define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT) #define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE) +#define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU) #include diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 3f20368afe9e..54b925224a13 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -84,7 +84,7 @@ static inline int ptrace_report_syscall(struct pt_regs *regs, * @regs: user register state of current task * * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or - * %TIF_SYSCALL_EMU have been set, when the current task has just + * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just * entered the kernel for a system call. Full user register state is * available here. Changing the values in @regs can affect the system * call number and arguments to be tried. It is safe to block here, -- cgit v1.2.3 From 785dc4eb7fd74e3b7f4eac468457b633117e1aea Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:42:04 -0500 Subject: audit: Migrate to use SYSCALL_WORK flag On architectures using the generic syscall entry code the architecture independent syscall work is moved to flags in thread_info::syscall_work. This removes architecture dependencies and frees up TIF bits. Define SYSCALL_WORK_SYSCALL_AUDIT, use it in the generic entry code and convert the code which uses the TIF specific helper functions to use the new *_syscall_work() helpers which either resolve to the new mode for users of the generic entry code or to the TIF based functions for the other architectures. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-9-krisman@collabora.com --- include/linux/entry-common.h | 18 ++++++------------ include/linux/thread_info.h | 2 ++ 2 files changed, 8 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index b30f82bed92b..d7b96f42817f 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -13,10 +13,6 @@ * Define dummy _TIF work flags if not defined by the architecture or for * disabled functionality. */ -#ifndef _TIF_SYSCALL_AUDIT -# define _TIF_SYSCALL_AUDIT (0) -#endif - #ifndef _TIF_PATCH_PENDING # define _TIF_PATCH_PENDING (0) #endif @@ -36,9 +32,7 @@ # define ARCH_SYSCALL_ENTER_WORK (0) #endif -#define SYSCALL_ENTER_WORK \ - (_TIF_SYSCALL_AUDIT | \ - ARCH_SYSCALL_ENTER_WORK) +#define SYSCALL_ENTER_WORK ARCH_SYSCALL_ENTER_WORK /* * TIF flags handled in syscall_exit_to_user_mode() @@ -47,16 +41,16 @@ # define ARCH_SYSCALL_EXIT_WORK (0) #endif -#define SYSCALL_EXIT_WORK \ - (_TIF_SYSCALL_AUDIT | \ - ARCH_SYSCALL_EXIT_WORK) +#define SYSCALL_EXIT_WORK ARCH_SYSCALL_EXIT_WORK #define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ SYSCALL_WORK_SYSCALL_TRACEPOINT | \ SYSCALL_WORK_SYSCALL_TRACE | \ - SYSCALL_WORK_SYSCALL_EMU) + SYSCALL_WORK_SYSCALL_EMU | \ + SYSCALL_WORK_SYSCALL_AUDIT) #define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ - SYSCALL_WORK_SYSCALL_TRACE) + SYSCALL_WORK_SYSCALL_TRACE | \ + SYSCALL_WORK_SYSCALL_AUDIT) /* * TIF flags handled in exit_to_user_mode_loop() diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 85b8a4216168..317363212ae9 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -40,12 +40,14 @@ enum syscall_work_bit { SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT, SYSCALL_WORK_BIT_SYSCALL_TRACE, SYSCALL_WORK_BIT_SYSCALL_EMU, + SYSCALL_WORK_BIT_SYSCALL_AUDIT, }; #define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) #define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT) #define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE) #define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU) +#define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT) #include -- cgit v1.2.3 From 2991552447707d791d9d81a5dc161f9e9e90b163 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Nov 2020 12:42:05 -0500 Subject: entry: Drop usage of TIF flags in the generic syscall code Now that the flags migration in the common syscall entry code is complete and the code relies exclusively on thread_info::syscall_work, clean up the accesses to TI flags in that path. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20201116174206.2639648-10-krisman@collabora.com --- include/linux/entry-common.h | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index d7b96f42817f..49b26b216e4e 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -26,31 +26,29 @@ #endif /* - * TIF flags handled in syscall_enter_from_user_mode() + * SYSCALL_WORK flags handled in syscall_enter_from_user_mode() */ -#ifndef ARCH_SYSCALL_ENTER_WORK -# define ARCH_SYSCALL_ENTER_WORK (0) +#ifndef ARCH_SYSCALL_WORK_ENTER +# define ARCH_SYSCALL_WORK_ENTER (0) #endif -#define SYSCALL_ENTER_WORK ARCH_SYSCALL_ENTER_WORK - /* - * TIF flags handled in syscall_exit_to_user_mode() + * SYSCALL_WORK flags handled in syscall_exit_to_user_mode() */ -#ifndef ARCH_SYSCALL_EXIT_WORK -# define ARCH_SYSCALL_EXIT_WORK (0) +#ifndef ARCH_SYSCALL_WORK_EXIT +# define ARCH_SYSCALL_WORK_EXIT (0) #endif -#define SYSCALL_EXIT_WORK ARCH_SYSCALL_EXIT_WORK - #define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ SYSCALL_WORK_SYSCALL_TRACEPOINT | \ SYSCALL_WORK_SYSCALL_TRACE | \ SYSCALL_WORK_SYSCALL_EMU | \ - SYSCALL_WORK_SYSCALL_AUDIT) + SYSCALL_WORK_SYSCALL_AUDIT | \ + ARCH_SYSCALL_WORK_ENTER) #define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ SYSCALL_WORK_SYSCALL_TRACE | \ - SYSCALL_WORK_SYSCALL_AUDIT) + SYSCALL_WORK_SYSCALL_AUDIT | \ + ARCH_SYSCALL_WORK_EXIT) /* * TIF flags handled in exit_to_user_mode_loop() @@ -136,8 +134,8 @@ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); * * It handles the following work items: * - * 1) TIF flag dependent invocations of arch_syscall_enter_tracehook(), - * __secure_computing(), trace_sys_enter() + * 1) syscall_work flag dependent invocations of + * arch_syscall_enter_tracehook(), __secure_computing(), trace_sys_enter() * 2) Invocation of audit_syscall_entry() */ long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); -- cgit v1.2.3 From e8b7db38449ac5b950a3f00519171c4be3e226ff Mon Sep 17 00:00:00 2001 From: Andres Beltran Date: Mon, 9 Nov 2020 11:04:00 +0100 Subject: Drivers: hv: vmbus: Add vmbus_requestor data structure for VMBus hardening Currently, VMbus drivers use pointers into guest memory as request IDs for interactions with Hyper-V. To be more robust in the face of errors or malicious behavior from a compromised Hyper-V, avoid exposing guest memory addresses to Hyper-V. Also avoid Hyper-V giving back a bad request ID that is then treated as the address of a guest data structure with no validation. Instead, encapsulate these memory addresses and provide small integers as request IDs. Signed-off-by: Andres Beltran Co-developed-by: Andrea Parri (Microsoft) Signed-off-by: Andrea Parri (Microsoft) Reviewed-by: Michael Kelley Reviewed-by: Wei Liu Link: https://lore.kernel.org/r/20201109100402.8946-2-parri.andrea@gmail.com Signed-off-by: Wei Liu --- include/linux/hyperv.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 1ce131f29f3b..5b6d5c4e3711 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -764,6 +764,22 @@ enum vmbus_device_type { HV_UNKNOWN, }; +/* + * Provides request ids for VMBus. Encapsulates guest memory + * addresses and stores the next available slot in req_arr + * to generate new ids in constant time. + */ +struct vmbus_requestor { + u64 *req_arr; + unsigned long *req_bitmap; /* is a given slot available? */ + u32 size; + u64 next_request_id; + spinlock_t req_lock; /* provides atomicity */ +}; + +#define VMBUS_NO_RQSTOR U64_MAX +#define VMBUS_RQST_ERROR (U64_MAX - 1) + struct vmbus_device { u16 dev_type; guid_t guid; @@ -988,8 +1004,14 @@ struct vmbus_channel { u32 fuzz_testing_interrupt_delay; u32 fuzz_testing_message_delay; + /* request/transaction ids for VMBus */ + struct vmbus_requestor requestor; + u32 rqstor_size; }; +u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr); +u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id); + static inline bool is_hvsock_channel(const struct vmbus_channel *c) { return !!(c->offermsg.offer.chn_flags & -- cgit v1.2.3 From 4d18fcc95f50950a99bd940d4e61a983f91d267a Mon Sep 17 00:00:00 2001 From: Andres Beltran Date: Mon, 9 Nov 2020 11:04:02 +0100 Subject: hv_netvsc: Use vmbus_requestor to generate transaction IDs for VMBus hardening Currently, pointers to guest memory are passed to Hyper-V as transaction IDs in netvsc. In the face of errors or malicious behavior in Hyper-V, netvsc should not expose or trust the transaction IDs returned by Hyper-V to be valid guest memory addresses. Instead, use small integers generated by vmbus_requestor as requests (transaction) IDs. Signed-off-by: Andres Beltran Co-developed-by: Andrea Parri (Microsoft) Signed-off-by: Andrea Parri (Microsoft) Reviewed-by: Michael Kelley Acked-by: Jakub Kicinski Reviewed-by: Wei Liu Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/r/20201109100402.8946-4-parri.andrea@gmail.com Signed-off-by: Wei Liu --- include/linux/hyperv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 5b6d5c4e3711..5ddb479c4d4c 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -779,6 +779,7 @@ struct vmbus_requestor { #define VMBUS_NO_RQSTOR U64_MAX #define VMBUS_RQST_ERROR (U64_MAX - 1) +#define VMBUS_RQST_ID_NO_RESPONSE (U64_MAX - 2) struct vmbus_device { u16 dev_type; -- cgit v1.2.3 From f97bb5272d9e95d400d6c8643ebb146b3e3e7842 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Nov 2020 09:08:41 +0100 Subject: sched: Fix data-race in wakeup Mel reported that on some ARM64 platforms loadavg goes bananas and Will tracked it down to the following race: CPU0 CPU1 schedule() prev->sched_contributes_to_load = X; deactivate_task(prev); try_to_wake_up() if (p->on_rq &&) // false if (smp_load_acquire(&p->on_cpu) && // true ttwu_queue_wakelist()) p->sched_remote_wakeup = Y; smp_store_release(prev->on_cpu, 0); where both p->sched_contributes_to_load and p->sched_remote_wakeup are in the same word, and thus the stores X and Y race (and can clobber one another's data). Whereas prior to commit c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") the p->on_cpu handoff serialized access to p->sched_remote_wakeup (just as it still does with p->sched_contributes_to_load) that commit broke that by calling ttwu_queue_wakelist() with p->on_cpu != 0. However, due to p->XXX = X ttwu() schedule() if (p->on_rq && ...) // false smp_mb__after_spinlock() if (smp_load_acquire(&p->on_cpu) && deactivate_task() ttwu_queue_wakelist()) p->on_rq = 0; p->sched_remote_wakeup = Y; We can be sure any 'current' store is complete and 'current' is guaranteed asleep. Therefore we can move p->sched_remote_wakeup into the current flags word. Note: while the observed failure was loadavg accounting gone wrong due to ttwu() cobbering p->sched_contributes_to_load, the reverse problem is also possible where schedule() clobbers p->sched_remote_wakeup, this could result in enqueue_entity() wrecking ->vruntime and causing scheduling artifacts. Fixes: c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") Reported-by: Mel Gorman Debugged-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201117083016.GK3121392@hirez.programming.kicks-ass.net --- include/linux/sched.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d383cf09e78f..0e91b451d2a2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -769,7 +769,6 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; - unsigned sched_remote_wakeup:1; #ifdef CONFIG_PSI unsigned sched_psi_wake_requeue:1; #endif @@ -779,6 +778,21 @@ struct task_struct { /* Unserialized, strictly 'current' */ + /* + * This field must not be in the scheduler word above due to wakelist + * queueing no longer being serialized by p->on_cpu. However: + * + * p->XXX = X; ttwu() + * schedule() if (p->on_rq && ..) // false + * smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true + * deactivate_task() ttwu_queue_wakelist()) + * p->on_rq = 0; p->sched_remote_wakeup = Y; + * + * guarantees all stores of 'current' are visible before + * ->sched_remote_wakeup gets used, so it can be in this word. + */ + unsigned sched_remote_wakeup:1; + /* Bit to tell LSMs we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; -- cgit v1.2.3 From 2279f540ea7d05f22d2f0c4224319330228586bc Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Tue, 17 Nov 2020 07:14:32 +0100 Subject: sched/deadline: Fix priority inheritance with multiple scheduling classes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Glenn reported that "an application [he developed produces] a BUG in deadline.c when a SCHED_DEADLINE task contends with CFS tasks on nested PTHREAD_PRIO_INHERIT mutexes. I believe the bug is triggered when a CFS task that was boosted by a SCHED_DEADLINE task boosts another CFS task (nested priority inheritance). ------------[ cut here ]------------ kernel BUG at kernel/sched/deadline.c:1462! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 12 PID: 19171 Comm: dl_boost_bug Tainted: ... Hardware name: ... RIP: 0010:enqueue_task_dl+0x335/0x910 Code: ... RSP: 0018:ffffc9000c2bbc68 EFLAGS: 00010002 RAX: 0000000000000009 RBX: ffff888c0af94c00 RCX: ffffffff81e12500 RDX: 000000000000002e RSI: ffff888c0af94c00 RDI: ffff888c10b22600 RBP: ffffc9000c2bbd08 R08: 0000000000000009 R09: 0000000000000078 R10: ffffffff81e12440 R11: ffffffff81e1236c R12: ffff888bc8932600 R13: ffff888c0af94eb8 R14: ffff888c10b22600 R15: ffff888bc8932600 FS: 00007fa58ac55700(0000) GS:ffff888c10b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa58b523230 CR3: 0000000bf44ab003 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? intel_pstate_update_util_hwp+0x13/0x170 rt_mutex_setprio+0x1cc/0x4b0 task_blocks_on_rt_mutex+0x225/0x260 rt_spin_lock_slowlock_locked+0xab/0x2d0 rt_spin_lock_slowlock+0x50/0x80 hrtimer_grab_expiry_lock+0x20/0x30 hrtimer_cancel+0x13/0x30 do_nanosleep+0xa0/0x150 hrtimer_nanosleep+0xe1/0x230 ? __hrtimer_init_sleeper+0x60/0x60 __x64_sys_nanosleep+0x8d/0xa0 do_syscall_64+0x4a/0x100 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fa58b52330d ... ---[ end trace 0000000000000002 ]— He also provided a simple reproducer creating the situation below: So the execution order of locking steps are the following (N1 and N2 are non-deadline tasks. D1 is a deadline task. M1 and M2 are mutexes that are enabled * with priority inheritance.) Time moves forward as this timeline goes down: N1 N2 D1 | | | | | | Lock(M1) | | | | | | Lock(M2) | | | | | | Lock(M2) | | | | Lock(M1) | | (!!bug triggered!) | Daniel reported a similar situation as well, by just letting ksoftirqd run with DEADLINE (and eventually block on a mutex). Problem is that boosted entities (Priority Inheritance) use static DEADLINE parameters of the top priority waiter. However, there might be cases where top waiter could be a non-DEADLINE entity that is currently boosted by a DEADLINE entity from a different lock chain (i.e., nested priority chains involving entities of non-DEADLINE classes). In this case, top waiter static DEADLINE parameters could be null (initialized to 0 at fork()) and replenish_dl_entity() would hit a BUG(). Fix this by keeping track of the original donor and using its parameters when a task is boosted. Reported-by: Glenn Elliott Reported-by: Daniel Bristot de Oliveira Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Tested-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201117061432.517340-1-juri.lelli@redhat.com --- include/linux/sched.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0e91b451d2a2..095fdec07b38 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -551,7 +551,6 @@ struct sched_dl_entity { * overruns. */ unsigned int dl_throttled : 1; - unsigned int dl_boosted : 1; unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; @@ -570,6 +569,15 @@ struct sched_dl_entity { * time. */ struct hrtimer inactive_timer; + +#ifdef CONFIG_RT_MUTEXES + /* + * Priority Inheritance. When a DEADLINE scheduling entity is boosted + * pi_se points to the donor, otherwise points to the dl_se it belongs + * to (the original one/itself). + */ + struct sched_dl_entity *pi_se; +#endif }; #ifdef CONFIG_UCLAMP_TASK -- cgit v1.2.3 From 95bb7c42ac8a94ce3d0eb059ad64430390351ccb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 13 Nov 2020 00:01:21 +0200 Subject: mm: Add 'mprotect' hook to struct vm_operations_struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Background ========== 1. SGX enclave pages are populated with data by copying from normal memory via ioctl() (SGX_IOC_ENCLAVE_ADD_PAGES), which will be added later in this series. 2. It is desirable to be able to restrict those normal memory data sources. For instance, to ensure that the source data is executable before copying data to an executable enclave page. 3. Enclave page permissions are dynamic (just like normal permissions) and can be adjusted at runtime with mprotect(). This creates a problem because the original data source may have long since vanished at the time when enclave page permissions are established (mmap() or mprotect()). The solution (elsewhere in this series) is to force enclave creators to declare their paging permission *intent* up front to the ioctl(). This intent can be immediately compared to the source data’s mapping and rejected if necessary. The “intent” is also stashed off for later comparison with enclave PTEs. This ensures that any future mmap()/mprotect() operations performed by the enclave creator or done on behalf of the enclave can be compared with the earlier declared permissions. Problem ======= There is an existing mmap() hook which allows SGX to perform this permission comparison at mmap() time. However, there is no corresponding ->mprotect() hook. Solution ======== Add a vm_ops->mprotect() hook so that mprotect() operations which are inconsistent with any page's stashed intent can be rejected by the driver. Signed-off-by: Sean Christopherson Co-developed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Borislav Petkov Acked-by: Jethro Beekman Acked-by: Dave Hansen Acked-by: Mel Gorman Acked-by: Hillf Danton Cc: linux-mm@kvack.org Link: https://lkml.kernel.org/r/20201112220135.165028-11-jarkko@kernel.org --- include/linux/mm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index db6ae4d3fb4e..1813fa86b981 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -559,6 +559,13 @@ struct vm_operations_struct { void (*close)(struct vm_area_struct * area); int (*split)(struct vm_area_struct * area, unsigned long addr); int (*mremap)(struct vm_area_struct * area); + /* + * Called by mprotect() to make driver-specific permission + * checks before mprotect() is finalised. The VMA must not + * be modified. Returns 0 if eprotect() can proceed. + */ + int (*mprotect)(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long newflags); vm_fault_t (*fault)(struct vm_fault *vmf); vm_fault_t (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); -- cgit v1.2.3 From 5bdba520c1b318578caffd325515b35d187f8a0e Mon Sep 17 00:00:00 2001 From: Faiyaz Mohammed Date: Mon, 16 Nov 2020 17:05:37 +0530 Subject: mm: memblock: drop __init from memblock functions to make it inline __init is used with inline due to which memblock wraper functions are not getting inline. for example: [ 0.000000] memblock_alloc_try_nid: 1490 bytes align=0x40 nid=-1 from=0x0000000000000000 max_addr=0x0000000000000000 memblock_alloc+0x20/0x2c [ 0.000000] memblock_reserve: [0x000000023f09a3c0-0x000000023f09a991] memblock_alloc_range_nid+0xc0/0x188 Dropping __init from memblock wrapper functions to make it inline and it increase the debugability. After: [ 0.000000] memblock_alloc_try_nid: 1490 bytes align=0x40 nid=-1 from=0x0000000000000000 max_addr=0x0000000000000000 start_kernel+0xa4/0x568 [ 0.000000] memblock_reserve: [0x000000023f09a3c0-0x000000023f09a991] memblock_alloc_range_nid+0xc0/0x188 Signed-off-by: Faiyaz Mohammed Signed-off-by: Mike Rapoport --- include/linux/memblock.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index ef131255cedc..b93c44b9121e 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -404,13 +404,13 @@ void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid); -static inline void * __init memblock_alloc(phys_addr_t size, phys_addr_t align) +static __always_inline void *memblock_alloc(phys_addr_t size, phys_addr_t align) { return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT, MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); } -static inline void * __init memblock_alloc_raw(phys_addr_t size, +static inline void *memblock_alloc_raw(phys_addr_t size, phys_addr_t align) { return memblock_alloc_try_nid_raw(size, align, MEMBLOCK_LOW_LIMIT, @@ -418,7 +418,7 @@ static inline void * __init memblock_alloc_raw(phys_addr_t size, NUMA_NO_NODE); } -static inline void * __init memblock_alloc_from(phys_addr_t size, +static inline void *memblock_alloc_from(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr) { @@ -426,33 +426,33 @@ static inline void * __init memblock_alloc_from(phys_addr_t size, MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); } -static inline void * __init memblock_alloc_low(phys_addr_t size, +static inline void *memblock_alloc_low(phys_addr_t size, phys_addr_t align) { return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT, ARCH_LOW_ADDRESS_LIMIT, NUMA_NO_NODE); } -static inline void * __init memblock_alloc_node(phys_addr_t size, +static inline void *memblock_alloc_node(phys_addr_t size, phys_addr_t align, int nid) { return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT, MEMBLOCK_ALLOC_ACCESSIBLE, nid); } -static inline void __init memblock_free_early(phys_addr_t base, +static inline void memblock_free_early(phys_addr_t base, phys_addr_t size) { memblock_free(base, size); } -static inline void __init memblock_free_early_nid(phys_addr_t base, +static inline void memblock_free_early_nid(phys_addr_t base, phys_addr_t size, int nid) { memblock_free(base, size); } -static inline void __init memblock_free_late(phys_addr_t base, phys_addr_t size) +static inline void memblock_free_late(phys_addr_t base, phys_addr_t size) { __memblock_free_late(base, size); } @@ -460,7 +460,7 @@ static inline void __init memblock_free_late(phys_addr_t base, phys_addr_t size) /* * Set the allocation direction to bottom-up or top-down. */ -static inline void __init memblock_set_bottom_up(bool enable) +static inline void memblock_set_bottom_up(bool enable) { memblock.bottom_up = enable; } -- cgit v1.2.3 From 1f90f6a835514cb69bfede0b2752b0cb7a351bbd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Nov 2020 22:45:05 +0200 Subject: resource: Group resource_overlaps() with other inline helpers For better maintenance group resource_overlaps() with other inline helpers. While at it, drop extra parentheses. Signed-off-by: Andy Shevchenko Reviewed-by: Hanjun Guo Tested-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/ioport.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 5135d4b86cd6..df4581107536 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -229,6 +229,11 @@ static inline bool resource_contains(struct resource *r1, struct resource *r2) return r1->start <= r2->start && r1->end >= r2->end; } +/* True if any part of r1 overlaps r2 */ +static inline bool resource_overlaps(struct resource *r1, struct resource *r2) +{ + return r1->start <= r2->end && r1->end >= r2->start; +} /* Convenience shorthand with allocation */ #define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) @@ -296,12 +301,6 @@ extern int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); -/* True if any part of r1 overlaps r2 */ -static inline bool resource_overlaps(struct resource *r1, struct resource *r2) -{ - return (r1->start <= r2->end && r1->end >= r2->start); -} - struct resource *devm_request_free_mem_region(struct device *dev, struct resource *base, unsigned long size); struct resource *request_free_mem_region(struct resource *base, -- cgit v1.2.3 From 5562f35d7feabfd68cd58a1ee28b451f90e82417 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Nov 2020 22:45:06 +0200 Subject: resource: Introduce resource_union() for overlapping resources Some already present users may utilize resource_union() helper. Provide it for them and for wider use in the future. Signed-off-by: Andy Shevchenko Reviewed-by: Hanjun Guo Tested-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/ioport.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index df4581107536..40320eb5bc0e 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -10,9 +10,10 @@ #define _LINUX_IOPORT_H #ifndef __ASSEMBLY__ +#include #include +#include #include -#include /* * Resources are tree-like, allowing * nesting etc.. @@ -235,6 +236,16 @@ static inline bool resource_overlaps(struct resource *r1, struct resource *r2) return r1->start <= r2->end && r1->end >= r2->start; } +static inline bool +resource_union(struct resource *r1, struct resource *r2, struct resource *r) +{ + if (!resource_overlaps(r1, r2)) + return false; + r->start = min(r1->start, r2->start); + r->end = max(r1->end, r2->end); + return true; +} + /* Convenience shorthand with allocation */ #define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) #define request_muxed_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), IORESOURCE_MUXED) -- cgit v1.2.3 From f65674df1b23cdcb6f656a14f659ffea83e7acaa Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Nov 2020 22:45:07 +0200 Subject: resource: Introduce resource_intersection() for overlapping resources There will be at least one user that can utilize new helper. Provide the helper for future user and for wider use. Signed-off-by: Andy Shevchenko Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/ioport.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 40320eb5bc0e..ece1a8db309c 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -236,6 +236,16 @@ static inline bool resource_overlaps(struct resource *r1, struct resource *r2) return r1->start <= r2->end && r1->end >= r2->start; } +static inline bool +resource_intersection(struct resource *r1, struct resource *r2, struct resource *r) +{ + if (!resource_overlaps(r1, r2)) + return false; + r->start = max(r1->start, r2->start); + r->end = min(r1->end, r2->end); + return true; +} + static inline bool resource_union(struct resource *r1, struct resource *r2, struct resource *r) { -- cgit v1.2.3 From 97f53a08cba128a724ebbbf34778d3553d559816 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 16 Nov 2020 13:21:08 -0800 Subject: net: linux/skbuff.h: combine SKB_EXTENSIONS + KCOV handling The previous Kconfig patch led to some other build errors as reported by the 0day bot and my own overnight build testing. These are all in when KCOV is enabled but SKB_EXTENSIONS is not enabled, so fix those by combining those conditions in the header file. Fixes: 6370cc3bbd8a ("net: add kcov handle to skb extensions") Fixes: 85ce50d337d1 ("net: kcov: don't select SKB_EXTENSIONS when there is no NET") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Aleksandr Nogikh Cc: Willem de Bruijn Acked-by: Florian Westphal Link: https://lore.kernel.org/r/20201116212108.32465-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2d01b2bbb746..0a1239819fd2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4608,7 +4608,7 @@ static inline void skb_reset_redirect(struct sk_buff *skb) #endif } -#ifdef CONFIG_KCOV +#if IS_ENABLED(CONFIG_KCOV) && IS_ENABLED(CONFIG_SKB_EXTENSIONS) static inline void skb_set_kcov_handle(struct sk_buff *skb, const u64 kcov_handle) { @@ -4636,7 +4636,7 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb) static inline void skb_set_kcov_handle(struct sk_buff *skb, const u64 kcov_handle) { } static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { return 0; } -#endif /* CONFIG_KCOV */ +#endif /* CONFIG_KCOV && CONFIG_SKB_EXTENSIONS */ #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- cgit v1.2.3 From 172292be01dbd6c26aba23f62e8ec090f31cdb71 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:41 +0100 Subject: dma-mapping: remove dma_virt_ops Now that the RDMA core deals with devices that only do DMA mapping in lower layers properly, there is no user for dma_virt_ops and it can be removed. Link: https://lore.kernel.org/r/20201106181941.1878556-11-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/dma-mapping.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 956151052d45..2aaed35b556d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -565,6 +565,4 @@ static inline int dma_mmap_wc(struct device *dev, int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, dma_addr_t dma_start, u64 size); -extern const struct dma_map_ops dma_virt_ops; - #endif /* _LINUX_DMA_MAPPING_H */ -- cgit v1.2.3 From f73659192b0bdf7bad826587b3530cef43cc048d Mon Sep 17 00:00:00 2001 From: Xie He Date: Sat, 14 Nov 2020 07:09:21 -0800 Subject: net: wan: Delete the DLCI / SDLA drivers The DLCI driver (dlci.c) implements the Frame Relay protocol. However, we already have another newer and better implementation of Frame Relay provided by the HDLC_FR driver (hdlc_fr.c). The DLCI driver's implementation of Frame Relay is used by only one hardware driver in the kernel - the SDLA driver (sdla.c). The SDLA driver provides Frame Relay support for the Sangoma S50x devices. However, the vendor provides their own driver (along with their own multi-WAN-protocol implementations including Frame Relay), called WANPIPE. I believe most users of the hardware would use the vendor-provided WANPIPE driver instead. (The WANPIPE driver was even once in the kernel, but was deleted in commit 8db60bcf3021 ("[WAN]: Remove broken and unmaintained Sangoma drivers.") because the vendor no longer updated the in-kernel WANPIPE driver.) Cc: Mike McLagan Signed-off-by: Xie He Link: https://lore.kernel.org/r/20201114150921.685594-1-xie.he.0141@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/if_frad.h | 92 ------------------- include/linux/sdla.h | 240 ------------------------------------------------ 2 files changed, 332 deletions(-) delete mode 100644 include/linux/if_frad.h delete mode 100644 include/linux/sdla.h (limited to 'include/linux') diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h deleted file mode 100644 index 52224de798aa..000000000000 --- a/include/linux/if_frad.h +++ /dev/null @@ -1,92 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * DLCI/FRAD Definitions for Frame Relay Access Devices. DLCI devices are - * created for each DLCI associated with a FRAD. The FRAD driver - * is not truly a network device, but the lower level device - * handler. This allows other FRAD manufacturers to use the DLCI - * code, including its RFC1490 encapsulation alongside the current - * implementation for the Sangoma cards. - * - * Version: @(#)if_ifrad.h 0.15 31 Mar 96 - * - * Author: Mike McLagan - * - * Changes: - * 0.15 Mike McLagan changed structure defs (packed) - * re-arranged flags - * added DLCI_RET vars - */ -#ifndef _FRAD_H_ -#define _FRAD_H_ - -#include - - -#if defined(CONFIG_DLCI) || defined(CONFIG_DLCI_MODULE) - -/* these are the fields of an RFC 1490 header */ -struct frhdr -{ - unsigned char control; - - /* for IP packets, this can be the NLPID */ - unsigned char pad; - - unsigned char NLPID; - unsigned char OUI[3]; - __be16 PID; - -#define IP_NLPID pad -} __packed; - -/* see RFC 1490 for the definition of the following */ -#define FRAD_I_UI 0x03 - -#define FRAD_P_PADDING 0x00 -#define FRAD_P_Q933 0x08 -#define FRAD_P_SNAP 0x80 -#define FRAD_P_CLNP 0x81 -#define FRAD_P_IP 0xCC - -struct dlci_local -{ - struct net_device *master; - struct net_device *slave; - struct dlci_conf config; - int configured; - struct list_head list; - - /* callback function */ - void (*receive)(struct sk_buff *skb, struct net_device *); -}; - -struct frad_local -{ - /* devices which this FRAD is slaved to */ - struct net_device *master[CONFIG_DLCI_MAX]; - short dlci[CONFIG_DLCI_MAX]; - - struct frad_conf config; - int configured; /* has this device been configured */ - int initialized; /* mem_start, port, irq set ? */ - - /* callback functions */ - int (*activate)(struct net_device *, struct net_device *); - int (*deactivate)(struct net_device *, struct net_device *); - int (*assoc)(struct net_device *, struct net_device *); - int (*deassoc)(struct net_device *, struct net_device *); - int (*dlci_conf)(struct net_device *, struct net_device *, int get); - - /* fields that are used by the Sangoma SDLA cards */ - struct timer_list timer; - struct net_device *dev; - int type; /* adapter type */ - int state; /* state of the S502/8 control latch */ - int buffer; /* current buffer for S508 firmware */ -}; - -#endif /* CONFIG_DLCI || CONFIG_DLCI_MODULE */ - -extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)); - -#endif diff --git a/include/linux/sdla.h b/include/linux/sdla.h deleted file mode 100644 index 00e8b3b614f0..000000000000 --- a/include/linux/sdla.h +++ /dev/null @@ -1,240 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Global definitions for the Frame relay interface. - * - * Version: @(#)if_ifrad.h 0.20 13 Apr 96 - * - * Author: Mike McLagan - * - * Changes: - * 0.15 Mike McLagan Structure packing - * - * 0.20 Mike McLagan New flags for S508 buffer handling - */ -#ifndef SDLA_H -#define SDLA_H - -#include - - -/* important Z80 window addresses */ -#define SDLA_CONTROL_WND 0xE000 - -#define SDLA_502_CMD_BUF 0xEF60 -#define SDLA_502_RCV_BUF 0xA900 -#define SDLA_502_TXN_AVAIL 0xFFF1 -#define SDLA_502_RCV_AVAIL 0xFFF2 -#define SDLA_502_EVENT_FLAGS 0xFFF3 -#define SDLA_502_MDM_STATUS 0xFFF4 -#define SDLA_502_IRQ_INTERFACE 0xFFFD -#define SDLA_502_IRQ_PERMISSION 0xFFFE -#define SDLA_502_DATA_OFS 0x0010 - -#define SDLA_508_CMD_BUF 0xE000 -#define SDLA_508_TXBUF_INFO 0xF100 -#define SDLA_508_RXBUF_INFO 0xF120 -#define SDLA_508_EVENT_FLAGS 0xF003 -#define SDLA_508_MDM_STATUS 0xF004 -#define SDLA_508_IRQ_INTERFACE 0xF010 -#define SDLA_508_IRQ_PERMISSION 0xF011 -#define SDLA_508_TSE_OFFSET 0xF012 - -/* Event flags */ -#define SDLA_EVENT_STATUS 0x01 -#define SDLA_EVENT_DLCI_STATUS 0x02 -#define SDLA_EVENT_BAD_DLCI 0x04 -#define SDLA_EVENT_LINK_DOWN 0x40 - -/* IRQ Trigger flags */ -#define SDLA_INTR_RX 0x01 -#define SDLA_INTR_TX 0x02 -#define SDLA_INTR_MODEM 0x04 -#define SDLA_INTR_COMPLETE 0x08 -#define SDLA_INTR_STATUS 0x10 -#define SDLA_INTR_TIMER 0x20 - -/* DLCI status bits */ -#define SDLA_DLCI_DELETED 0x01 -#define SDLA_DLCI_ACTIVE 0x02 -#define SDLA_DLCI_WAITING 0x04 -#define SDLA_DLCI_NEW 0x08 -#define SDLA_DLCI_INCLUDED 0x40 - -/* valid command codes */ -#define SDLA_INFORMATION_WRITE 0x01 -#define SDLA_INFORMATION_READ 0x02 -#define SDLA_ISSUE_IN_CHANNEL_SIGNAL 0x03 -#define SDLA_SET_DLCI_CONFIGURATION 0x10 -#define SDLA_READ_DLCI_CONFIGURATION 0x11 -#define SDLA_DISABLE_COMMUNICATIONS 0x12 -#define SDLA_ENABLE_COMMUNICATIONS 0x13 -#define SDLA_READ_DLC_STATUS 0x14 -#define SDLA_READ_DLC_STATISTICS 0x15 -#define SDLA_FLUSH_DLC_STATISTICS 0x16 -#define SDLA_LIST_ACTIVE_DLCI 0x17 -#define SDLA_FLUSH_INFORMATION_BUFFERS 0x18 -#define SDLA_ADD_DLCI 0x20 -#define SDLA_DELETE_DLCI 0x21 -#define SDLA_ACTIVATE_DLCI 0x22 -#define SDLA_DEACTIVATE_DLCI 0x23 -#define SDLA_READ_MODEM_STATUS 0x30 -#define SDLA_SET_MODEM_STATUS 0x31 -#define SDLA_READ_COMMS_ERR_STATS 0x32 -#define SDLA_FLUSH_COMMS_ERR_STATS 0x33 -#define SDLA_READ_CODE_VERSION 0x40 -#define SDLA_SET_IRQ_TRIGGER 0x50 -#define SDLA_GET_IRQ_TRIGGER 0x51 - -/* In channel signal types */ -#define SDLA_ICS_LINK_VERIFY 0x02 -#define SDLA_ICS_STATUS_ENQ 0x03 - -/* modem status flags */ -#define SDLA_MODEM_DTR_HIGH 0x01 -#define SDLA_MODEM_RTS_HIGH 0x02 -#define SDLA_MODEM_DCD_HIGH 0x08 -#define SDLA_MODEM_CTS_HIGH 0x20 - -/* used for RET_MODEM interpretation */ -#define SDLA_MODEM_DCD_LOW 0x01 -#define SDLA_MODEM_CTS_LOW 0x02 - -/* return codes */ -#define SDLA_RET_OK 0x00 -#define SDLA_RET_COMMUNICATIONS 0x01 -#define SDLA_RET_CHANNEL_INACTIVE 0x02 -#define SDLA_RET_DLCI_INACTIVE 0x03 -#define SDLA_RET_DLCI_CONFIG 0x04 -#define SDLA_RET_BUF_TOO_BIG 0x05 -#define SDLA_RET_NO_DATA 0x05 -#define SDLA_RET_BUF_OVERSIZE 0x06 -#define SDLA_RET_CIR_OVERFLOW 0x07 -#define SDLA_RET_NO_BUFS 0x08 -#define SDLA_RET_TIMEOUT 0x0A -#define SDLA_RET_MODEM 0x10 -#define SDLA_RET_CHANNEL_OFF 0x11 -#define SDLA_RET_CHANNEL_ON 0x12 -#define SDLA_RET_DLCI_STATUS 0x13 -#define SDLA_RET_DLCI_UNKNOWN 0x14 -#define SDLA_RET_COMMAND_INVALID 0x1F - -/* Configuration flags */ -#define SDLA_DIRECT_RECV 0x0080 -#define SDLA_TX_NO_EXCEPT 0x0020 -#define SDLA_NO_ICF_MSGS 0x1000 -#define SDLA_TX50_RX50 0x0000 -#define SDLA_TX70_RX30 0x2000 -#define SDLA_TX30_RX70 0x4000 - -/* IRQ selection flags */ -#define SDLA_IRQ_RECEIVE 0x01 -#define SDLA_IRQ_TRANSMIT 0x02 -#define SDLA_IRQ_MODEM_STAT 0x04 -#define SDLA_IRQ_COMMAND 0x08 -#define SDLA_IRQ_CHANNEL 0x10 -#define SDLA_IRQ_TIMER 0x20 - -/* definitions for PC memory mapping */ -#define SDLA_8K_WINDOW 0x01 -#define SDLA_S502_SEG_A 0x10 -#define SDLA_S502_SEG_C 0x20 -#define SDLA_S502_SEG_D 0x00 -#define SDLA_S502_SEG_E 0x30 -#define SDLA_S507_SEG_A 0x00 -#define SDLA_S507_SEG_B 0x40 -#define SDLA_S507_SEG_C 0x80 -#define SDLA_S507_SEG_E 0xC0 -#define SDLA_S508_SEG_A 0x00 -#define SDLA_S508_SEG_C 0x10 -#define SDLA_S508_SEG_D 0x08 -#define SDLA_S508_SEG_E 0x18 - -/* SDLA adapter port constants */ -#define SDLA_IO_EXTENTS 0x04 - -#define SDLA_REG_CONTROL 0x00 -#define SDLA_REG_PC_WINDOW 0x01 /* offset for PC window select latch */ -#define SDLA_REG_Z80_WINDOW 0x02 /* offset for Z80 window select latch */ -#define SDLA_REG_Z80_CONTROL 0x03 /* offset for Z80 control latch */ - -#define SDLA_S502_STS 0x00 /* status reg for 502, 502E, 507 */ -#define SDLA_S508_GNRL 0x00 /* general purp. reg for 508 */ -#define SDLA_S508_STS 0x01 /* status reg for 508 */ -#define SDLA_S508_IDR 0x02 /* ID reg for 508 */ - -/* control register flags */ -#define SDLA_S502A_START 0x00 /* start the CPU */ -#define SDLA_S502A_INTREQ 0x02 -#define SDLA_S502A_INTEN 0x04 -#define SDLA_S502A_HALT 0x08 /* halt the CPU */ -#define SDLA_S502A_NMI 0x10 /* issue an NMI to the CPU */ - -#define SDLA_S502E_CPUEN 0x01 -#define SDLA_S502E_ENABLE 0x02 -#define SDLA_S502E_INTACK 0x04 - -#define SDLA_S507_ENABLE 0x01 -#define SDLA_S507_IRQ3 0x00 -#define SDLA_S507_IRQ4 0x20 -#define SDLA_S507_IRQ5 0x40 -#define SDLA_S507_IRQ7 0x60 -#define SDLA_S507_IRQ10 0x80 -#define SDLA_S507_IRQ11 0xA0 -#define SDLA_S507_IRQ12 0xC0 -#define SDLA_S507_IRQ15 0xE0 - -#define SDLA_HALT 0x00 -#define SDLA_CPUEN 0x02 -#define SDLA_MEMEN 0x04 -#define SDLA_S507_EPROMWR 0x08 -#define SDLA_S507_EPROMCLK 0x10 -#define SDLA_S508_INTRQ 0x08 -#define SDLA_S508_INTEN 0x10 - -struct sdla_cmd { - char opp_flag; - char cmd; - short length; - char retval; - short dlci; - char flags; - short rxlost_int; - long rxlost_app; - char reserve[2]; - char data[SDLA_MAX_DATA]; /* transfer data buffer */ -} __attribute__((packed)); - -struct intr_info { - char flags; - short txlen; - char irq; - char flags2; - short timeout; -} __attribute__((packed)); - -/* found in the 508's control window at RXBUF_INFO */ -struct buf_info { - unsigned short rse_num; - unsigned long rse_base; - unsigned long rse_next; - unsigned long buf_base; - unsigned short reserved; - unsigned long buf_top; -} __attribute__((packed)); - -/* structure pointed to by rse_base in RXBUF_INFO struct */ -struct buf_entry { - char opp_flag; - short length; - short dlci; - char flags; - short timestamp; - short reserved[2]; - long buf_addr; -} __attribute__((packed)); - -#endif -- cgit v1.2.3 From 270f3385cddf5db3799b393459476bd9abff89f1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 16 Nov 2020 11:17:59 +0100 Subject: net: core: fix some kernel-doc markups Some identifiers have different names between their prototypes and the kernel-doc markup. In the specific case of netif_subqueue_stopped(), keep the current markup for __netif_subqueue_stopped(), adding a new one for netif_subqueue_stopped(). Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ce648a564f7..03433a4c929e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1490,7 +1490,7 @@ struct net_device_ops { }; /** - * enum net_device_priv_flags - &struct net_device priv_flags + * enum netdev_priv_flags - &struct net_device priv_flags * * These are the &struct net_device, they are only set internally * by drivers and used in the kernel. These flags are invisible to @@ -3602,7 +3602,7 @@ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) } /** - * netif_subqueue_stopped - test status of subqueue + * __netif_subqueue_stopped - test status of subqueue * @dev: network device * @queue_index: sub queue index * @@ -3616,6 +3616,13 @@ static inline bool __netif_subqueue_stopped(const struct net_device *dev, return netif_tx_queue_stopped(txq); } +/** + * netif_subqueue_stopped - test status of subqueue + * @dev: network device + * @skb: sub queue buffer pointer + * + * Check individual transmit queue of a device with multiple transmit queues. + */ static inline bool netif_subqueue_stopped(const struct net_device *dev, struct sk_buff *skb) { -- cgit v1.2.3 From ed5298c7d500abaf34ed7783969e953a1f028e5b Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 28 Sep 2020 09:39:50 +0530 Subject: bus: mhi: Remove auto-start option There is really no point having an auto-start for channels. This is confusing for the device drivers, some have to enable the channels, others don't have... and waste resources (e.g. pre allocated buffers) that may never be used. This is really up to the MHI device(channel) driver to manage the state of its channels. While at it, let's also remove the auto-start option from ath11k mhi controller. Signed-off-by: Loic Poulain Acked-by: Kalle Valo Reviewed-by: Manivannan Sadhasivam [mani: clubbed ath11k change] Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index d4841e5a5f45..6522a4adc794 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -214,7 +214,6 @@ enum mhi_db_brst_mode { * @offload_channel: The client manages the channel completely * @doorbell_mode_switch: Channel switches to doorbell mode on M0 transition * @auto_queue: Framework will automatically queue buffers for DL traffic - * @auto_start: Automatically start (open) this channel * @wake-capable: Channel capable of waking up the system */ struct mhi_channel_config { @@ -232,7 +231,6 @@ struct mhi_channel_config { bool offload_channel; bool doorbell_mode_switch; bool auto_queue; - bool auto_start; bool wake_capable; }; -- cgit v1.2.3 From 16fee29b07358293f135759d9fdbf1267da57ebd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 17:02:17 +0100 Subject: dma-mapping: remove the dma_direct_set_offset export Drop the dma_direct_set_offset export and move the declaration to dma-map-ops.h now that the Allwinner drivers have stopped calling it. Signed-off-by: Christoph Hellwig Signed-off-by: Maxime Ripard --- include/linux/dma-map-ops.h | 3 +++ include/linux/dma-mapping.h | 7 ------- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index a5f89fc4d6df..03925e438ec3 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -226,6 +226,9 @@ struct page *dma_alloc_from_pool(struct device *dev, size_t size, bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t)); bool dma_free_from_pool(struct device *dev, void *start, size_t size); +int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, + dma_addr_t dma_start, u64 size); + #ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H #include #elif defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 956151052d45..199d85285246 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -558,13 +558,6 @@ static inline int dma_mmap_wc(struct device *dev, #define dma_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) #endif -/* - * Legacy interface to set up the dma offset map. Drivers really should not - * actually use it, but we have a few legacy cases left. - */ -int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, - dma_addr_t dma_start, u64 size); - extern const struct dma_map_ops dma_virt_ops; #endif /* _LINUX_DMA_MAPPING_H */ -- cgit v1.2.3 From 78e1d22687ff1fd320abac12e8a607ea79782c48 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Fri, 6 Nov 2020 09:44:47 -0800 Subject: bus: mhi: core: Expose mhi_get_exec_env() API for controllers The mhi_get_exec_env() APIs can be used by the controller drivers to query the execution environment of the MHI device. Expose it so it can be used in some scenarios to determine behavior of controllers. Signed-off-by: Bhaumik Bhatt Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index d4841e5a5f45..9225d5551d69 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -658,6 +658,12 @@ int mhi_download_rddm_img(struct mhi_controller *mhi_cntrl, bool in_panic); */ int mhi_force_rddm_mode(struct mhi_controller *mhi_cntrl); +/** + * mhi_get_exec_env - Get BHI execution environment of the device + * @mhi_cntrl: MHI controller + */ +enum mhi_ee_type mhi_get_exec_env(struct mhi_controller *mhi_cntrl); + /** * mhi_get_mhi_state - Get MHI state of the device * @mhi_cntrl: MHI controller -- cgit v1.2.3 From 9e1660e5c396ee081907386d0b95b8e0804a6c86 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Fri, 6 Nov 2020 09:44:49 -0800 Subject: bus: mhi: core: Rename RDDM download function to use proper words mhi_download_rddm_img() uses a shorter version of the word image. Expand it and rename the function to mhi_download_rddm_image(). Signed-off-by: Bhaumik Bhatt Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 9225d5551d69..52b3c60bf9bb 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -645,12 +645,12 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl); int mhi_pm_resume(struct mhi_controller *mhi_cntrl); /** - * mhi_download_rddm_img - Download ramdump image from device for - * debugging purpose. + * mhi_download_rddm_image - Download ramdump image from device for + * debugging purpose. * @mhi_cntrl: MHI controller * @in_panic: Download rddm image during kernel panic */ -int mhi_download_rddm_img(struct mhi_controller *mhi_cntrl, bool in_panic); +int mhi_download_rddm_image(struct mhi_controller *mhi_cntrl, bool in_panic); /** * mhi_force_rddm_mode - Force device into rddm mode -- cgit v1.2.3 From 8f70397876872789b2a5deba804eb6216fb5deb7 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Mon, 9 Nov 2020 12:47:21 -0800 Subject: bus: mhi: core: Move to using high priority workqueue MHI work is currently scheduled on the global/system workqueue and can encounter delays on a stressed system. To avoid those unforeseen delays which can hamper bootup or shutdown times, use a dedicated high priority workqueue instead of the global/system workqueue. Signed-off-by: Bhaumik Bhatt Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 52b3c60bf9bb..1ed5f2aa224b 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -337,6 +337,7 @@ struct mhi_controller_config { * @wlock: Lock for protecting device wakeup * @mhi_link_info: Device bandwidth info * @st_worker: State transition worker + * @hiprio_wq: High priority workqueue for MHI work such as state transitions * @state_event: State change event * @status_cb: CB function to notify power states of the device (required) * @wake_get: CB function to assert device wake (optional) @@ -419,6 +420,7 @@ struct mhi_controller { spinlock_t wlock; struct mhi_link_info mhi_link_info; struct work_struct st_worker; + struct workqueue_struct *hiprio_wq; wait_queue_head_t state_event; void (*status_cb)(struct mhi_controller *mhi_cntrl, -- cgit v1.2.3 From 5c62634fc65101d350cbd47722fb76f02693059d Mon Sep 17 00:00:00 2001 From: Hui Su Date: Wed, 18 Nov 2020 00:17:50 +0800 Subject: namespace: make timens_on_fork() return nothing timens_on_fork() always return 0, and maybe not need to judge the return value in copy_namespaces(). So make timens_on_fork() return nothing and do not judge its return val in copy_namespaces(). Signed-off-by: Hui Su Link: https://lore.kernel.org/r/20201117161750.GA45121@rlk Signed-off-by: Christian Brauner --- include/linux/time_namespace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 68770ac9ba89..30312166e70a 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -45,7 +45,7 @@ struct time_namespace *copy_time_ns(unsigned long flags, struct user_namespace *user_ns, struct time_namespace *old_ns); void free_time_ns(struct kref *kref); -int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk); +void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk); struct vdso_data *arch_get_vdso_data(void *vvar_page); static inline void put_time_ns(struct time_namespace *ns) @@ -136,10 +136,10 @@ struct time_namespace *copy_time_ns(unsigned long flags, return old_ns; } -static inline int timens_on_fork(struct nsproxy *nsproxy, +static inline void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk) { - return 0; + return; } static inline void timens_add_monotonic(struct timespec64 *ts) { } -- cgit v1.2.3 From 13d40ff85da8a85935e3fd47061dee71a02af0d4 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Fri, 13 Nov 2020 12:24:56 -0800 Subject: usb: typec: Correct the bit values for the Thunderbolt rounded/non-rounded cable support Rounded and non-rounded Thunderbolt cables are represented by two bits as per USB Type-C Connector specification v2.0 section F.2.6. Corrected that in the Thunderbolt 3 cable discover mode VDO. Signed-off-by: Utkarsh Patel Reviewed-by: Heikki Krogerus -- Changes in v2: - Removed the fixes tag as there is no functional implication. -- Link: https://lore.kernel.org/r/20201113202503.6559-2-utkarsh.h.patel@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_tbt.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec_tbt.h b/include/linux/usb/typec_tbt.h index 47c2d501ddce..aad648d14bb3 100644 --- a/include/linux/usb/typec_tbt.h +++ b/include/linux/usb/typec_tbt.h @@ -40,11 +40,16 @@ struct typec_thunderbolt_data { #define TBT_CABLE_USB3_PASSIVE 2 #define TBT_CABLE_10_AND_20GBPS 3 #define TBT_CABLE_ROUNDED BIT(19) +#define TBT_CABLE_ROUNDED_SUPPORT(_vdo_) \ + (((_vdo_) & GENMASK(20, 19)) >> 19) +#define TBT_GEN3_NON_ROUNDED 0 +#define TBT_GEN3_GEN4_ROUNDED_NON_ROUNDED 1 #define TBT_CABLE_OPTICAL BIT(21) #define TBT_CABLE_RETIMER BIT(22) #define TBT_CABLE_LINK_TRAINING BIT(23) #define TBT_SET_CABLE_SPEED(_s_) (((_s_) & GENMASK(2, 0)) << 16) +#define TBT_SET_CABLE_ROUNDED(_g_) (((_g_) & GENMASK(1, 0)) << 19) /* TBT3 Device Enter Mode VDO bits */ #define TBT_ENTER_MODE_CABLE_SPEED(s) TBT_SET_CABLE_SPEED(s) -- cgit v1.2.3 From 523a97aa3b756c1e0efe33ae48d450f8b0052073 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Fri, 13 Nov 2020 12:24:59 -0800 Subject: usb: typec: Remove one bit support for the Thunderbolt rounded/non-rounded cable Two bits support for the Thunderbolt rounded/non-rounded cable has been added to the header file. Hence, removing unused TBT_CABLE_ROUNDED definition from the header file. Signed-off-by: Utkarsh Patel Reviewed-by: Heikki Krogerus -- changes in v2: - Removed the fixes tag as there is no functional implication. -- Link: https://lore.kernel.org/r/20201113202503.6559-5-utkarsh.h.patel@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_tbt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/typec_tbt.h b/include/linux/usb/typec_tbt.h index aad648d14bb3..63dd44b72e0c 100644 --- a/include/linux/usb/typec_tbt.h +++ b/include/linux/usb/typec_tbt.h @@ -39,7 +39,6 @@ struct typec_thunderbolt_data { #define TBT_CABLE_USB3_GEN1 1 #define TBT_CABLE_USB3_PASSIVE 2 #define TBT_CABLE_10_AND_20GBPS 3 -#define TBT_CABLE_ROUNDED BIT(19) #define TBT_CABLE_ROUNDED_SUPPORT(_vdo_) \ (((_vdo_) & GENMASK(20, 19)) >> 19) #define TBT_GEN3_NON_ROUNDED 0 -- cgit v1.2.3 From 8a5ca78f603977d6b9b2d7dcb3b1b6ef601d3cc7 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Mon, 16 Nov 2020 12:11:38 -0800 Subject: usb: pd: Add captive Type C cable type The USB Power Delivery Specification R3.0 adds a captive cable type to the "USB Type-C plug to USB Type-C/Captive" field (Bits 19-18, Passive/Active Cable VDO, Table 6-38 & 6-39). Add the corresponding definition to the Cable VDO header. Also add a helper macro to get the Type C cable connector type, when provided the cable VDO. Cc: Heikki Krogerus Signed-off-by: Prashant Malani Reviewed-by: Benson Leung Reviewed-by: Greg Kroah-Hartman Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201116201150.2919178-2-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd_vdo.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/pd_vdo.h b/include/linux/usb/pd_vdo.h index 68bdc4e2f5a9..8c5cb5830754 100644 --- a/include/linux/usb/pd_vdo.h +++ b/include/linux/usb/pd_vdo.h @@ -177,7 +177,7 @@ * <31:28> :: Cable HW version * <27:24> :: Cable FW version * <23:20> :: Reserved, Shall be set to zero - * <19:18> :: type-C to Type-A/B/C (00b == A, 01 == B, 10 == C) + * <19:18> :: type-C to Type-A/B/C/Captive (00b == A, 01 == B, 10 == C, 11 == Captive) * <17> :: Type-C to Plug/Receptacle (0b == plug, 1b == receptacle) * <16:13> :: cable latency (0001 == <10ns(~1m length)) * <12:11> :: cable termination type (11b == both ends active VCONN req) @@ -193,6 +193,7 @@ #define CABLE_ATYPE 0 #define CABLE_BTYPE 1 #define CABLE_CTYPE 2 +#define CABLE_CAPTIVE 3 #define CABLE_PLUG 0 #define CABLE_RECEPTACLE 1 #define CABLE_CURR_1A5 0 @@ -208,6 +209,7 @@ | (tx1d) << 10 | (tx2d) << 9 | (rx1d) << 8 | (rx2d) << 7 \ | ((cur) & 0x3) << 5 | (vps) << 4 | (sopp) << 3 \ | ((usbss) & 0x7)) +#define VDO_TYPEC_CABLE_TYPE(vdo) (((vdo) >> 18) & 0x3) /* * AMA VDO -- cgit v1.2.3 From a0ccdc4a77a1b36b682ae60361879eca0a0f88d6 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Mon, 16 Nov 2020 12:11:40 -0800 Subject: usb: typec: Add number of altmodes partner attr Add a user-visible attribute for the number of alternate modes available in a partner. This allows userspace to determine whether there are any remaining alternate modes left to be registered by the kernel driver. It can begin executing any policy state machine after all available alternate modes have been registered with the connector class framework. This value is set to "-1" initially, signifying that a valid number of alternate modes haven't been set for the partner. Also add a sysfs file which exposes this attribute. The file remains hidden as long as the attribute value is -1. Cc: Benson Leung Cc: Heikki Krogerus Signed-off-by: Prashant Malani Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201116201150.2919178-3-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 6be558045942..bc6b1a71cb8a 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -126,6 +126,7 @@ struct typec_altmode_desc { enum typec_port_data roles; }; +int typec_partner_set_num_altmodes(struct typec_partner *partner, int num_altmodes); struct typec_altmode *typec_partner_register_altmode(struct typec_partner *partner, const struct typec_altmode_desc *desc); -- cgit v1.2.3 From a30a00e37ceb094f949e4d96c2c586e6503b5d1d Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 28 Oct 2020 23:31:32 -0700 Subject: usb: typec: tcpm: frs sourcing vbus callback During FRS hardware autonomously starts to source vbus. Provide callback to perform chip specific operations. Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201029063138.1429760-5-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index 09762d26fa0c..7303f518ba49 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -83,6 +83,9 @@ enum tcpm_transmit_type { * Optional; Called to enable/disable PD 3.0 fast role swap. * Enabling frs is accessory dependent as not all PD3.0 * accessories support fast role swap. + * @frs_sourcing_vbus: + * Optional; Called to notify that vbus is now being sourced. + * Low level drivers can perform chip specific operations, if any. */ struct tcpc_dev { struct fwnode_handle *fwnode; @@ -109,6 +112,7 @@ struct tcpc_dev { const struct pd_message *msg); int (*set_bist_data)(struct tcpc_dev *dev, bool on); int (*enable_frs)(struct tcpc_dev *dev, bool enable); + void (*frs_sourcing_vbus)(struct tcpc_dev *dev); }; struct tcpm_port; -- cgit v1.2.3 From f321a02caebdd0c56e167610cda2fa148cd96e8b Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 28 Oct 2020 23:31:35 -0700 Subject: usb: typec: tcpm: Implement enabling Auto Discharge disconnect support TCPCI spec allows TCPC hardware to autonomously discharge the vbus capacitance upon disconnect. The expectation is that the TCPM enables AutoDischargeDisconnect while entering SNK/SRC_ATTACHED states. Hardware then automously discharges vbus when the vbus falls below a certain threshold i.e. VBUS_SINK_DISCONNECT_THRESHOLD. Apart from enabling the vbus discharge circuit, AutoDischargeDisconnect is also used a flag to move TCPCI based TCPC implementations into Attached.Snk/Attached.Src state as mentioned in Figure 4-15. TCPC State Diagram before a Connection of the USB Type-C Port Controller Interface Specification. In such TCPC implementations, setting AutoDischargeDisconnect would prevent TCPC into entering "Connection_Invalid" state as well. Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201029063138.1429760-8-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index 7303f518ba49..e68aaa12886f 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -86,6 +86,18 @@ enum tcpm_transmit_type { * @frs_sourcing_vbus: * Optional; Called to notify that vbus is now being sourced. * Low level drivers can perform chip specific operations, if any. + * @enable_auto_vbus_discharge: + * Optional; TCPCI spec based TCPC implementations can optionally + * support hardware to autonomously dischrge vbus upon disconnecting + * as sink or source. TCPM signals TCPC to enable the mechanism upon + * entering connected state and signals disabling upon disconnect. + * @set_auto_vbus_discharge_threshold: + * Mandatory when enable_auto_vbus_discharge is implemented. TCPM + * calls this function to allow lower levels drivers to program the + * vbus threshold voltage below which the vbus discharge circuit + * will be turned on. requested_vbus_voltage is set to 0 when vbus + * is going to disappear knowingly i.e. during PR_SWAP and + * HARD_RESET etc. */ struct tcpc_dev { struct fwnode_handle *fwnode; @@ -113,6 +125,9 @@ struct tcpc_dev { int (*set_bist_data)(struct tcpc_dev *dev, bool on); int (*enable_frs)(struct tcpc_dev *dev, bool enable); void (*frs_sourcing_vbus)(struct tcpc_dev *dev); + int (*enable_auto_vbus_discharge)(struct tcpc_dev *dev, bool enable); + int (*set_auto_vbus_discharge_threshold)(struct tcpc_dev *dev, enum typec_pwr_opmode mode, + bool pps_active, u32 requested_vbus_voltage); }; struct tcpm_port; -- cgit v1.2.3 From e1e52361c61afdf81d81cfbbfa3ce08971e60f50 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Mon, 16 Nov 2020 12:11:42 -0800 Subject: usb: typec: Add plug num_altmodes sysfs attr Add a field to the typec_plug struct to record the number of available altmodes as well as the corresponding sysfs attribute to expose this to userspace. This allows userspace to determine whether there are any remaining alternate modes left to be registered by the kernel driver. It can begin executing any policy state machine after all available alternate modes have been registered with the connector class framework. This value is set to "-1" initially, signifying that a valid number of alternate modes haven't been set for the plug. The sysfs file remains hidden as long as the attribute value is -1. We re-use the partner attribute for number_of_alternate_modes since the usage and name is similar, and update the corresponding *_show() command to support both partner and plugs. Signed-off-by: Prashant Malani Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201116201150.2919178-4-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index bc6b1a71cb8a..54475323f83b 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -130,6 +130,7 @@ int typec_partner_set_num_altmodes(struct typec_partner *partner, int num_altmod struct typec_altmode *typec_partner_register_altmode(struct typec_partner *partner, const struct typec_altmode_desc *desc); +int typec_plug_set_num_altmodes(struct typec_plug *plug, int num_altmodes); struct typec_altmode *typec_plug_register_altmode(struct typec_plug *plug, const struct typec_altmode_desc *desc); -- cgit v1.2.3 From 4d213e76a359e540ca786ee937da7f35faa8e5f8 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Tue, 10 Nov 2020 15:19:08 +0800 Subject: iommu/vt-d: Avoid panic if iommu init fails in tboot system "intel_iommu=off" command line is used to disable iommu but iommu is force enabled in a tboot system for security reason. However for better performance on high speed network device, a new option "intel_iommu=tboot_noforce" is introduced to disable the force on. By default kernel should panic if iommu init fail in tboot for security reason, but it's unnecessory if we use "intel_iommu=tboot_noforce,off". Fix the code setting force_on and move intel_iommu_tboot_noforce from tboot code to intel iommu code. Fixes: 7304e8f28bb2 ("iommu/vt-d: Correctly disable Intel IOMMU force on") Signed-off-by: Zhenzhong Duan Tested-by: Lukasz Hawrylko Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20201110071908.3133-1-zhenzhong.duan@gmail.com Signed-off-by: Will Deacon --- include/linux/intel-iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index fbf5b3e7707e..d956987ed032 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -798,7 +798,6 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; extern int intel_iommu_enabled; -extern int intel_iommu_tboot_noforce; extern int intel_iommu_gfx_mapped; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) -- cgit v1.2.3 From d04a53b1c48766665806eb75b73137734abdaa95 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Tue, 17 Nov 2020 22:38:26 +0100 Subject: ptp: document struct ptp_clock_request members It's arguable most people interested in configuring a PPS signal want it as external output, not as kernel input. PTP_CLK_REQ_PPS is for input though. Add documentation to nudge readers into the correct direction. Signed-off-by: Ahmad Fatoum Acked-by: Richard Cochran Link: https://lore.kernel.org/r/20201117213826.18235-1-a.fatoum@pengutronix.de Signed-off-by: Jakub Kicinski --- include/linux/ptp_clock_kernel.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index d3e8ba5c7125..0d47fd33b228 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -12,6 +12,19 @@ #include #include +/** + * struct ptp_clock_request - request PTP clock event + * + * @type: The type of the request. + * EXTTS: Configure external trigger timestamping + * PEROUT: Configure periodic output signal (e.g. PPS) + * PPS: trigger internal PPS event for input + * into kernel PPS subsystem + * @extts: describes configuration for external trigger timestamping. + * This is only valid when event == PTP_CLK_REQ_EXTTS. + * @perout: describes configuration for periodic output. + * This is only valid when event == PTP_CLK_REQ_PEROUT. + */ struct ptp_clock_request { enum { -- cgit v1.2.3 From d055126180564a57fe533728a4e93d0cb53d49b3 Mon Sep 17 00:00:00 2001 From: Dmitrii Banshchikov Date: Tue, 17 Nov 2020 18:45:49 +0000 Subject: bpf: Add bpf_ktime_get_coarse_ns helper The helper uses CLOCK_MONOTONIC_COARSE source of time that is less accurate but more performant. We have a BPF CGROUP_SKB firewall that supports event logging through bpf_perf_event_output(). Each event has a timestamp and currently we use bpf_ktime_get_ns() for it. Use of bpf_ktime_get_coarse_ns() saves ~15-20 ns in time required for event logging. bpf_ktime_get_ns(): EgressLogByRemoteEndpoint 113.82ns 8.79M bpf_ktime_get_coarse_ns(): EgressLogByRemoteEndpoint 95.40ns 10.48M Signed-off-by: Dmitrii Banshchikov Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20201117184549.257280-1-me@ubique.spb.ru --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 581b2a2e78eb..e1bcb6d7345c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1842,6 +1842,7 @@ extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; +extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); -- cgit v1.2.3 From f2bcc2fa275b913093ff5b403be5d11342fdb055 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 16 Nov 2020 17:21:15 +0100 Subject: atm: nicstar: Replace in_interrupt() usage push_scqe() uses in_interrupt() to figure out if it is allowed to sleep. The usage of in_interrupt() in drivers is phased out and Linus clearly requested that code which changes behaviour depending on context should either be separated or the context be conveyed in an argument passed by the caller, which usually knows the context. Aside of that in_interrupt() is not correct as it does not catch preempt disabled regions which neither can sleep. ns_send() (the only caller of push_scqe()) has the following callers: - vcc_sendmsg() used as proto_ops::sendmsg is expected to be invoked in preemtible context. -> vcc->dev->ops->send() (ns_send()) - atm_vcc::send via atmdev_ops::send either directly (pointer copied by atm_init_aal34() or atm_init_aal5()) or via atm_send_aal0(). This is invoked by drivers (like br2684, clip, pppoatm, ...) which are called from net_device_ops::ndo_start_xmit with BH disabled. Add atmdev_ops::send_bh which is used by callers from BH context (atm_send_aal*()) and if this callback missing then ::send is used instead. Implement this callback in nicstar and use it to replace in_interrupt(). Cc: Chas Williams <3chas3@gmail.com> Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Jakub Kicinski --- include/linux/atmdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 5d5ff2203fa2..d7493016cd46 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -186,6 +186,7 @@ struct atmdev_ops { /* only send is required */ void __user *arg); #endif int (*send)(struct atm_vcc *vcc,struct sk_buff *skb); + int (*send_bh)(struct atm_vcc *vcc, struct sk_buff *skb); int (*send_oam)(struct atm_vcc *vcc,void *cell,int flags); void (*phy_put)(struct atm_dev *dev,unsigned char value, unsigned long addr); -- cgit v1.2.3 From 4abb1e5b63ac3281275315fc6b0cde0b9c2e2e42 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 11 Nov 2020 15:53:17 +0100 Subject: powerpc/mm: factor out creating/removing linear mapping We want to stop abusing memory hotplug infrastructure in memtrace code to perform allocations and remove the linear mapping. Instead we will use alloc_contig_pages() and remove the linear mapping manually. Let's factor out creating/removing the linear mapping into arch_create_linear_mapping() / arch_remove_linear_mapping() - so in the future, we might be able to have whole arch_add_memory() / arch_remove_memory() be implemented in common code. Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201111145322.15793-4-david@redhat.com --- include/linux/memory_hotplug.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index d65c6fdc5cfc..00b9e9bd3850 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -375,6 +375,9 @@ extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn, unsigned long nr_pages); +extern int arch_create_linear_mapping(int nid, u64 start, u64 size, + struct mhp_params *params); +void arch_remove_linear_mapping(u64 start, u64 size); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __LINUX_MEMORY_HOTPLUG_H */ -- cgit v1.2.3 From 86b9d170da98bae13b307d621638954aef645331 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Tue, 10 Nov 2020 17:13:37 +0100 Subject: mfd: syscon: Add syscon_regmap_lookup_by_phandle_optional() function. This adds syscon_regmap_lookup_by_phandle_optional() function to get an optional regmap. It behaves the same as syscon_regmap_lookup_by_phandle() except where there is no regmap phandle. In this case, instead of returning -ENODEV, the function returns NULL. This makes error checking simpler when the regmap phandle is optional. Suggested-by: Nicolas Boichat Signed-off-by: Enric Balletbo i Serra Reviewed-by: Matthias Brugger Reviewed-by: Arnd Bergmann Signed-off-by: Lee Jones --- include/linux/mfd/syscon.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h index 7f20e9b502a5..fecc2fa2a364 100644 --- a/include/linux/mfd/syscon.h +++ b/include/linux/mfd/syscon.h @@ -28,6 +28,9 @@ extern struct regmap *syscon_regmap_lookup_by_phandle_args( const char *property, int arg_count, unsigned int *out_args); +extern struct regmap *syscon_regmap_lookup_by_phandle_optional( + struct device_node *np, + const char *property); #else static inline struct regmap *device_node_to_regmap(struct device_node *np) { @@ -59,6 +62,14 @@ static inline struct regmap *syscon_regmap_lookup_by_phandle_args( { return ERR_PTR(-ENOTSUPP); } + +static inline struct regmap *syscon_regmap_lookup_by_phandle_optional( + struct device_node *np, + const char *property) +{ + return NULL; +} + #endif #endif /* __LINUX_MFD_SYSCON_H__ */ -- cgit v1.2.3 From 68a90a6c6443b07036ae4a878f6d85bf141471fd Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 25 Sep 2020 10:14:46 +0100 Subject: mfd: madera: Delete register field xxx_WIDTH defines The register field xxx_WIDTH defines are not used in current code. Signed-off-by: Richard Fitzgerald Signed-off-by: Lee Jones --- include/linux/mfd/madera/registers.h | 635 ----------------------------------- 1 file changed, 635 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/madera/registers.h b/include/linux/mfd/madera/registers.h index fe909d177762..b44aeb461d0c 100644 --- a/include/linux/mfd/madera/registers.h +++ b/include/linux/mfd/madera/registers.h @@ -1286,566 +1286,438 @@ /* (0x0000) Software_Reset */ #define MADERA_SW_RST_DEV_ID1_MASK 0xFFFF #define MADERA_SW_RST_DEV_ID1_SHIFT 0 -#define MADERA_SW_RST_DEV_ID1_WIDTH 16 /* (0x0001) Hardware_Revision */ #define MADERA_HW_REVISION_MASK 0x00FF #define MADERA_HW_REVISION_SHIFT 0 -#define MADERA_HW_REVISION_WIDTH 8 /* (0x0020) Tone_Generator_1 */ #define MADERA_TONE2_ENA 0x0002 #define MADERA_TONE2_ENA_MASK 0x0002 #define MADERA_TONE2_ENA_SHIFT 1 -#define MADERA_TONE2_ENA_WIDTH 1 #define MADERA_TONE1_ENA 0x0001 #define MADERA_TONE1_ENA_MASK 0x0001 #define MADERA_TONE1_ENA_SHIFT 0 -#define MADERA_TONE1_ENA_WIDTH 1 /* (0x0021) Tone_Generator_2 */ #define MADERA_TONE1_LVL_0_MASK 0xFFFF #define MADERA_TONE1_LVL_0_SHIFT 0 -#define MADERA_TONE1_LVL_0_WIDTH 16 /* (0x0022) Tone_Generator_3 */ #define MADERA_TONE1_LVL_MASK 0x00FF #define MADERA_TONE1_LVL_SHIFT 0 -#define MADERA_TONE1_LVL_WIDTH 8 /* (0x0023) Tone_Generator_4 */ #define MADERA_TONE2_LVL_0_MASK 0xFFFF #define MADERA_TONE2_LVL_0_SHIFT 0 -#define MADERA_TONE2_LVL_0_WIDTH 16 /* (0x0024) Tone_Generator_5 */ #define MADERA_TONE2_LVL_MASK 0x00FF #define MADERA_TONE2_LVL_SHIFT 0 -#define MADERA_TONE2_LVL_WIDTH 8 /* (0x0030) PWM_Drive_1 */ #define MADERA_PWM2_ENA 0x0002 #define MADERA_PWM2_ENA_MASK 0x0002 #define MADERA_PWM2_ENA_SHIFT 1 -#define MADERA_PWM2_ENA_WIDTH 1 #define MADERA_PWM1_ENA 0x0001 #define MADERA_PWM1_ENA_MASK 0x0001 #define MADERA_PWM1_ENA_SHIFT 0 -#define MADERA_PWM1_ENA_WIDTH 1 /* (0x00A0) Comfort_Noise_Generator */ #define MADERA_NOISE_GEN_ENA 0x0020 #define MADERA_NOISE_GEN_ENA_MASK 0x0020 #define MADERA_NOISE_GEN_ENA_SHIFT 5 -#define MADERA_NOISE_GEN_ENA_WIDTH 1 #define MADERA_NOISE_GEN_GAIN_MASK 0x001F #define MADERA_NOISE_GEN_GAIN_SHIFT 0 -#define MADERA_NOISE_GEN_GAIN_WIDTH 5 /* (0x0100) Clock_32k_1 */ #define MADERA_CLK_32K_ENA 0x0040 #define MADERA_CLK_32K_ENA_MASK 0x0040 #define MADERA_CLK_32K_ENA_SHIFT 6 -#define MADERA_CLK_32K_ENA_WIDTH 1 #define MADERA_CLK_32K_SRC_MASK 0x0003 #define MADERA_CLK_32K_SRC_SHIFT 0 -#define MADERA_CLK_32K_SRC_WIDTH 2 /* (0x0101) System_Clock_1 */ #define MADERA_SYSCLK_FRAC 0x8000 #define MADERA_SYSCLK_FRAC_MASK 0x8000 #define MADERA_SYSCLK_FRAC_SHIFT 15 -#define MADERA_SYSCLK_FRAC_WIDTH 1 #define MADERA_SYSCLK_FREQ_MASK 0x0700 #define MADERA_SYSCLK_FREQ_SHIFT 8 -#define MADERA_SYSCLK_FREQ_WIDTH 3 #define MADERA_SYSCLK_ENA 0x0040 #define MADERA_SYSCLK_ENA_MASK 0x0040 #define MADERA_SYSCLK_ENA_SHIFT 6 -#define MADERA_SYSCLK_ENA_WIDTH 1 #define MADERA_SYSCLK_SRC_MASK 0x000F #define MADERA_SYSCLK_SRC_SHIFT 0 -#define MADERA_SYSCLK_SRC_WIDTH 4 /* (0x0102) Sample_rate_1 */ #define MADERA_SAMPLE_RATE_1_MASK 0x001F #define MADERA_SAMPLE_RATE_1_SHIFT 0 -#define MADERA_SAMPLE_RATE_1_WIDTH 5 /* (0x0103) Sample_rate_2 */ #define MADERA_SAMPLE_RATE_2_MASK 0x001F #define MADERA_SAMPLE_RATE_2_SHIFT 0 -#define MADERA_SAMPLE_RATE_2_WIDTH 5 /* (0x0104) Sample_rate_3 */ #define MADERA_SAMPLE_RATE_3_MASK 0x001F #define MADERA_SAMPLE_RATE_3_SHIFT 0 -#define MADERA_SAMPLE_RATE_3_WIDTH 5 /* (0x0112) Async_clock_1 */ #define MADERA_ASYNC_CLK_FREQ_MASK 0x0700 #define MADERA_ASYNC_CLK_FREQ_SHIFT 8 -#define MADERA_ASYNC_CLK_FREQ_WIDTH 3 #define MADERA_ASYNC_CLK_ENA 0x0040 #define MADERA_ASYNC_CLK_ENA_MASK 0x0040 #define MADERA_ASYNC_CLK_ENA_SHIFT 6 -#define MADERA_ASYNC_CLK_ENA_WIDTH 1 #define MADERA_ASYNC_CLK_SRC_MASK 0x000F #define MADERA_ASYNC_CLK_SRC_SHIFT 0 -#define MADERA_ASYNC_CLK_SRC_WIDTH 4 /* (0x0113) Async_sample_rate_1 */ #define MADERA_ASYNC_SAMPLE_RATE_1_MASK 0x001F #define MADERA_ASYNC_SAMPLE_RATE_1_SHIFT 0 -#define MADERA_ASYNC_SAMPLE_RATE_1_WIDTH 5 /* (0x0114) Async_sample_rate_2 */ #define MADERA_ASYNC_SAMPLE_RATE_2_MASK 0x001F #define MADERA_ASYNC_SAMPLE_RATE_2_SHIFT 0 -#define MADERA_ASYNC_SAMPLE_RATE_2_WIDTH 5 /* (0x0120) DSP_Clock_1 */ #define MADERA_DSP_CLK_FREQ_LEGACY 0x0700 #define MADERA_DSP_CLK_FREQ_LEGACY_MASK 0x0700 #define MADERA_DSP_CLK_FREQ_LEGACY_SHIFT 8 -#define MADERA_DSP_CLK_FREQ_LEGACY_WIDTH 3 #define MADERA_DSP_CLK_ENA 0x0040 #define MADERA_DSP_CLK_ENA_MASK 0x0040 #define MADERA_DSP_CLK_ENA_SHIFT 6 -#define MADERA_DSP_CLK_ENA_WIDTH 1 #define MADERA_DSP_CLK_SRC 0x000F #define MADERA_DSP_CLK_SRC_MASK 0x000F #define MADERA_DSP_CLK_SRC_SHIFT 0 -#define MADERA_DSP_CLK_SRC_WIDTH 4 /* (0x0122) DSP_Clock_2 */ #define MADERA_DSP_CLK_FREQ_MASK 0x03FF #define MADERA_DSP_CLK_FREQ_SHIFT 0 -#define MADERA_DSP_CLK_FREQ_WIDTH 10 /* (0x0149) Output_system_clock */ #define MADERA_OPCLK_ENA 0x8000 #define MADERA_OPCLK_ENA_MASK 0x8000 #define MADERA_OPCLK_ENA_SHIFT 15 -#define MADERA_OPCLK_ENA_WIDTH 1 #define MADERA_OPCLK_DIV_MASK 0x00F8 #define MADERA_OPCLK_DIV_SHIFT 3 -#define MADERA_OPCLK_DIV_WIDTH 5 #define MADERA_OPCLK_SEL_MASK 0x0007 #define MADERA_OPCLK_SEL_SHIFT 0 -#define MADERA_OPCLK_SEL_WIDTH 3 /* (0x014A) Output_async_clock */ #define MADERA_OPCLK_ASYNC_ENA 0x8000 #define MADERA_OPCLK_ASYNC_ENA_MASK 0x8000 #define MADERA_OPCLK_ASYNC_ENA_SHIFT 15 -#define MADERA_OPCLK_ASYNC_ENA_WIDTH 1 #define MADERA_OPCLK_ASYNC_DIV_MASK 0x00F8 #define MADERA_OPCLK_ASYNC_DIV_SHIFT 3 -#define MADERA_OPCLK_ASYNC_DIV_WIDTH 5 #define MADERA_OPCLK_ASYNC_SEL_MASK 0x0007 #define MADERA_OPCLK_ASYNC_SEL_SHIFT 0 -#define MADERA_OPCLK_ASYNC_SEL_WIDTH 3 /* (0x0171) FLL1_Control_1 */ #define CS47L92_FLL1_REFCLK_SRC_MASK 0xF000 #define CS47L92_FLL1_REFCLK_SRC_SHIFT 12 -#define CS47L92_FLL1_REFCLK_SRC_WIDTH 4 #define MADERA_FLL1_HOLD_MASK 0x0004 #define MADERA_FLL1_HOLD_SHIFT 2 -#define MADERA_FLL1_HOLD_WIDTH 1 #define MADERA_FLL1_FREERUN 0x0002 #define MADERA_FLL1_FREERUN_MASK 0x0002 #define MADERA_FLL1_FREERUN_SHIFT 1 -#define MADERA_FLL1_FREERUN_WIDTH 1 #define MADERA_FLL1_ENA 0x0001 #define MADERA_FLL1_ENA_MASK 0x0001 #define MADERA_FLL1_ENA_SHIFT 0 -#define MADERA_FLL1_ENA_WIDTH 1 /* (0x0172) FLL1_Control_2 */ #define MADERA_FLL1_CTRL_UPD 0x8000 #define MADERA_FLL1_CTRL_UPD_MASK 0x8000 #define MADERA_FLL1_CTRL_UPD_SHIFT 15 -#define MADERA_FLL1_CTRL_UPD_WIDTH 1 #define MADERA_FLL1_N_MASK 0x03FF #define MADERA_FLL1_N_SHIFT 0 -#define MADERA_FLL1_N_WIDTH 10 /* (0x0173) FLL1_Control_3 */ #define MADERA_FLL1_THETA_MASK 0xFFFF #define MADERA_FLL1_THETA_SHIFT 0 -#define MADERA_FLL1_THETA_WIDTH 16 /* (0x0174) FLL1_Control_4 */ #define MADERA_FLL1_LAMBDA_MASK 0xFFFF #define MADERA_FLL1_LAMBDA_SHIFT 0 -#define MADERA_FLL1_LAMBDA_WIDTH 16 /* (0x0175) FLL1_Control_5 */ #define MADERA_FLL1_FRATIO_MASK 0x0F00 #define MADERA_FLL1_FRATIO_SHIFT 8 -#define MADERA_FLL1_FRATIO_WIDTH 4 #define MADERA_FLL1_FB_DIV_MASK 0x03FF #define MADERA_FLL1_FB_DIV_SHIFT 0 -#define MADERA_FLL1_FB_DIV_WIDTH 10 /* (0x0176) FLL1_Control_6 */ #define MADERA_FLL1_REFCLK_DIV_MASK 0x00C0 #define MADERA_FLL1_REFCLK_DIV_SHIFT 6 -#define MADERA_FLL1_REFCLK_DIV_WIDTH 2 #define MADERA_FLL1_REFCLK_SRC_MASK 0x000F #define MADERA_FLL1_REFCLK_SRC_SHIFT 0 -#define MADERA_FLL1_REFCLK_SRC_WIDTH 4 /* (0x0179) FLL1_Control_7 */ #define MADERA_FLL1_GAIN_MASK 0x003c #define MADERA_FLL1_GAIN_SHIFT 2 -#define MADERA_FLL1_GAIN_WIDTH 4 /* (0x017A) FLL1_EFS_2 */ #define MADERA_FLL1_PHASE_GAIN_MASK 0xF000 #define MADERA_FLL1_PHASE_GAIN_SHIFT 12 -#define MADERA_FLL1_PHASE_GAIN_WIDTH 4 #define MADERA_FLL1_PHASE_ENA_MASK 0x0800 #define MADERA_FLL1_PHASE_ENA_SHIFT 11 -#define MADERA_FLL1_PHASE_ENA_WIDTH 1 /* (0x017A) FLL1_Control_10 */ #define MADERA_FLL1_HP_MASK 0xC000 #define MADERA_FLL1_HP_SHIFT 14 -#define MADERA_FLL1_HP_WIDTH 2 #define MADERA_FLL1_PHASEDET_ENA_MASK 0x1000 #define MADERA_FLL1_PHASEDET_ENA_SHIFT 12 -#define MADERA_FLL1_PHASEDET_ENA_WIDTH 1 /* (0x017B) FLL1_Control_11 */ #define MADERA_FLL1_LOCKDET_THR_MASK 0x001E #define MADERA_FLL1_LOCKDET_THR_SHIFT 1 -#define MADERA_FLL1_LOCKDET_THR_WIDTH 4 #define MADERA_FLL1_LOCKDET_MASK 0x0001 #define MADERA_FLL1_LOCKDET_SHIFT 0 -#define MADERA_FLL1_LOCKDET_WIDTH 1 /* (0x017D) FLL1_Digital_Test_1 */ #define MADERA_FLL1_SYNC_EFS_ENA_MASK 0x0100 #define MADERA_FLL1_SYNC_EFS_ENA_SHIFT 8 -#define MADERA_FLL1_SYNC_EFS_ENA_WIDTH 1 #define MADERA_FLL1_CLK_VCO_FAST_SRC_MASK 0x0003 #define MADERA_FLL1_CLK_VCO_FAST_SRC_SHIFT 0 -#define MADERA_FLL1_CLK_VCO_FAST_SRC_WIDTH 2 /* (0x0181) FLL1_Synchroniser_1 */ #define MADERA_FLL1_SYNC_ENA 0x0001 #define MADERA_FLL1_SYNC_ENA_MASK 0x0001 #define MADERA_FLL1_SYNC_ENA_SHIFT 0 -#define MADERA_FLL1_SYNC_ENA_WIDTH 1 /* (0x0182) FLL1_Synchroniser_2 */ #define MADERA_FLL1_SYNC_N_MASK 0x03FF #define MADERA_FLL1_SYNC_N_SHIFT 0 -#define MADERA_FLL1_SYNC_N_WIDTH 10 /* (0x0183) FLL1_Synchroniser_3 */ #define MADERA_FLL1_SYNC_THETA_MASK 0xFFFF #define MADERA_FLL1_SYNC_THETA_SHIFT 0 -#define MADERA_FLL1_SYNC_THETA_WIDTH 16 /* (0x0184) FLL1_Synchroniser_4 */ #define MADERA_FLL1_SYNC_LAMBDA_MASK 0xFFFF #define MADERA_FLL1_SYNC_LAMBDA_SHIFT 0 -#define MADERA_FLL1_SYNC_LAMBDA_WIDTH 16 /* (0x0185) FLL1_Synchroniser_5 */ #define MADERA_FLL1_SYNC_FRATIO_MASK 0x0700 #define MADERA_FLL1_SYNC_FRATIO_SHIFT 8 -#define MADERA_FLL1_SYNC_FRATIO_WIDTH 3 /* (0x0186) FLL1_Synchroniser_6 */ #define MADERA_FLL1_SYNCCLK_DIV_MASK 0x00C0 #define MADERA_FLL1_SYNCCLK_DIV_SHIFT 6 -#define MADERA_FLL1_SYNCCLK_DIV_WIDTH 2 #define MADERA_FLL1_SYNCCLK_SRC_MASK 0x000F #define MADERA_FLL1_SYNCCLK_SRC_SHIFT 0 -#define MADERA_FLL1_SYNCCLK_SRC_WIDTH 4 /* (0x0187) FLL1_Synchroniser_7 */ #define MADERA_FLL1_SYNC_GAIN_MASK 0x003c #define MADERA_FLL1_SYNC_GAIN_SHIFT 2 -#define MADERA_FLL1_SYNC_GAIN_WIDTH 4 #define MADERA_FLL1_SYNC_DFSAT 0x0001 #define MADERA_FLL1_SYNC_DFSAT_MASK 0x0001 #define MADERA_FLL1_SYNC_DFSAT_SHIFT 0 -#define MADERA_FLL1_SYNC_DFSAT_WIDTH 1 /* (0x01D1) FLL_AO_Control_1 */ #define MADERA_FLL_AO_HOLD 0x0004 #define MADERA_FLL_AO_HOLD_MASK 0x0004 #define MADERA_FLL_AO_HOLD_SHIFT 2 -#define MADERA_FLL_AO_HOLD_WIDTH 1 #define MADERA_FLL_AO_FREERUN 0x0002 #define MADERA_FLL_AO_FREERUN_MASK 0x0002 #define MADERA_FLL_AO_FREERUN_SHIFT 1 -#define MADERA_FLL_AO_FREERUN_WIDTH 1 #define MADERA_FLL_AO_ENA 0x0001 #define MADERA_FLL_AO_ENA_MASK 0x0001 #define MADERA_FLL_AO_ENA_SHIFT 0 -#define MADERA_FLL_AO_ENA_WIDTH 1 /* (0x01D2) FLL_AO_Control_2 */ #define MADERA_FLL_AO_CTRL_UPD 0x8000 #define MADERA_FLL_AO_CTRL_UPD_MASK 0x8000 #define MADERA_FLL_AO_CTRL_UPD_SHIFT 15 -#define MADERA_FLL_AO_CTRL_UPD_WIDTH 1 /* (0x01D6) FLL_AO_Control_6 */ #define MADERA_FLL_AO_REFCLK_SRC_MASK 0x000F #define MADERA_FLL_AO_REFCLK_SRC_SHIFT 0 -#define MADERA_FLL_AO_REFCLK_SRC_WIDTH 4 /* (0x0200) Mic_Charge_Pump_1 */ #define MADERA_CPMIC_BYPASS 0x0002 #define MADERA_CPMIC_BYPASS_MASK 0x0002 #define MADERA_CPMIC_BYPASS_SHIFT 1 -#define MADERA_CPMIC_BYPASS_WIDTH 1 #define MADERA_CPMIC_ENA 0x0001 #define MADERA_CPMIC_ENA_MASK 0x0001 #define MADERA_CPMIC_ENA_SHIFT 0 -#define MADERA_CPMIC_ENA_WIDTH 1 /* (0x0210) LDO1_Control_1 */ #define MADERA_LDO1_VSEL_MASK 0x07E0 #define MADERA_LDO1_VSEL_SHIFT 5 -#define MADERA_LDO1_VSEL_WIDTH 6 #define MADERA_LDO1_FAST 0x0010 #define MADERA_LDO1_FAST_MASK 0x0010 #define MADERA_LDO1_FAST_SHIFT 4 -#define MADERA_LDO1_FAST_WIDTH 1 #define MADERA_LDO1_DISCH 0x0004 #define MADERA_LDO1_DISCH_MASK 0x0004 #define MADERA_LDO1_DISCH_SHIFT 2 -#define MADERA_LDO1_DISCH_WIDTH 1 #define MADERA_LDO1_BYPASS 0x0002 #define MADERA_LDO1_BYPASS_MASK 0x0002 #define MADERA_LDO1_BYPASS_SHIFT 1 -#define MADERA_LDO1_BYPASS_WIDTH 1 #define MADERA_LDO1_ENA 0x0001 #define MADERA_LDO1_ENA_MASK 0x0001 #define MADERA_LDO1_ENA_SHIFT 0 -#define MADERA_LDO1_ENA_WIDTH 1 /* (0x0213) LDO2_Control_1 */ #define MADERA_LDO2_VSEL_MASK 0x07E0 #define MADERA_LDO2_VSEL_SHIFT 5 -#define MADERA_LDO2_VSEL_WIDTH 6 #define MADERA_LDO2_FAST 0x0010 #define MADERA_LDO2_FAST_MASK 0x0010 #define MADERA_LDO2_FAST_SHIFT 4 -#define MADERA_LDO2_FAST_WIDTH 1 #define MADERA_LDO2_DISCH 0x0004 #define MADERA_LDO2_DISCH_MASK 0x0004 #define MADERA_LDO2_DISCH_SHIFT 2 -#define MADERA_LDO2_DISCH_WIDTH 1 #define MADERA_LDO2_BYPASS 0x0002 #define MADERA_LDO2_BYPASS_MASK 0x0002 #define MADERA_LDO2_BYPASS_SHIFT 1 -#define MADERA_LDO2_BYPASS_WIDTH 1 #define MADERA_LDO2_ENA 0x0001 #define MADERA_LDO2_ENA_MASK 0x0001 #define MADERA_LDO2_ENA_SHIFT 0 -#define MADERA_LDO2_ENA_WIDTH 1 /* (0x0218) Mic_Bias_Ctrl_1 */ #define MADERA_MICB1_EXT_CAP 0x8000 #define MADERA_MICB1_EXT_CAP_MASK 0x8000 #define MADERA_MICB1_EXT_CAP_SHIFT 15 -#define MADERA_MICB1_EXT_CAP_WIDTH 1 #define MADERA_MICB1_LVL_MASK 0x01E0 #define MADERA_MICB1_LVL_SHIFT 5 -#define MADERA_MICB1_LVL_WIDTH 4 #define MADERA_MICB1_ENA 0x0001 #define MADERA_MICB1_ENA_MASK 0x0001 #define MADERA_MICB1_ENA_SHIFT 0 -#define MADERA_MICB1_ENA_WIDTH 1 /* (0x021C) Mic_Bias_Ctrl_5 */ #define MADERA_MICB1D_ENA 0x1000 #define MADERA_MICB1D_ENA_MASK 0x1000 #define MADERA_MICB1D_ENA_SHIFT 12 -#define MADERA_MICB1D_ENA_WIDTH 1 #define MADERA_MICB1C_ENA 0x0100 #define MADERA_MICB1C_ENA_MASK 0x0100 #define MADERA_MICB1C_ENA_SHIFT 8 -#define MADERA_MICB1C_ENA_WIDTH 1 #define MADERA_MICB1B_ENA 0x0010 #define MADERA_MICB1B_ENA_MASK 0x0010 #define MADERA_MICB1B_ENA_SHIFT 4 -#define MADERA_MICB1B_ENA_WIDTH 1 #define MADERA_MICB1A_ENA 0x0001 #define MADERA_MICB1A_ENA_MASK 0x0001 #define MADERA_MICB1A_ENA_SHIFT 0 -#define MADERA_MICB1A_ENA_WIDTH 1 /* (0x021E) Mic_Bias_Ctrl_6 */ #define MADERA_MICB2D_ENA 0x1000 #define MADERA_MICB2D_ENA_MASK 0x1000 #define MADERA_MICB2D_ENA_SHIFT 12 -#define MADERA_MICB2D_ENA_WIDTH 1 #define MADERA_MICB2C_ENA 0x0100 #define MADERA_MICB2C_ENA_MASK 0x0100 #define MADERA_MICB2C_ENA_SHIFT 8 -#define MADERA_MICB2C_ENA_WIDTH 1 #define MADERA_MICB2B_ENA 0x0010 #define MADERA_MICB2B_ENA_MASK 0x0010 #define MADERA_MICB2B_ENA_SHIFT 4 -#define MADERA_MICB2B_ENA_WIDTH 1 #define MADERA_MICB2A_ENA 0x0001 #define MADERA_MICB2A_ENA_MASK 0x0001 #define MADERA_MICB2A_ENA_SHIFT 0 -#define MADERA_MICB2A_ENA_WIDTH 1 /* (0x0225) - HP Ctrl 1L */ #define MADERA_RMV_SHRT_HP1L 0x4000 #define MADERA_RMV_SHRT_HP1L_MASK 0x4000 #define MADERA_RMV_SHRT_HP1L_SHIFT 14 -#define MADERA_RMV_SHRT_HP1L_WIDTH 1 #define MADERA_HP1L_FLWR 0x0004 #define MADERA_HP1L_FLWR_MASK 0x0004 #define MADERA_HP1L_FLWR_SHIFT 2 -#define MADERA_HP1L_FLWR_WIDTH 1 #define MADERA_HP1L_SHRTI 0x0002 #define MADERA_HP1L_SHRTI_MASK 0x0002 #define MADERA_HP1L_SHRTI_SHIFT 1 -#define MADERA_HP1L_SHRTI_WIDTH 1 #define MADERA_HP1L_SHRTO 0x0001 #define MADERA_HP1L_SHRTO_MASK 0x0001 #define MADERA_HP1L_SHRTO_SHIFT 0 -#define MADERA_HP1L_SHRTO_WIDTH 1 /* (0x0226) - HP Ctrl 1R */ #define MADERA_RMV_SHRT_HP1R 0x4000 #define MADERA_RMV_SHRT_HP1R_MASK 0x4000 #define MADERA_RMV_SHRT_HP1R_SHIFT 14 -#define MADERA_RMV_SHRT_HP1R_WIDTH 1 #define MADERA_HP1R_FLWR 0x0004 #define MADERA_HP1R_FLWR_MASK 0x0004 #define MADERA_HP1R_FLWR_SHIFT 2 -#define MADERA_HP1R_FLWR_WIDTH 1 #define MADERA_HP1R_SHRTI 0x0002 #define MADERA_HP1R_SHRTI_MASK 0x0002 #define MADERA_HP1R_SHRTI_SHIFT 1 -#define MADERA_HP1R_SHRTI_WIDTH 1 #define MADERA_HP1R_SHRTO 0x0001 #define MADERA_HP1R_SHRTO_MASK 0x0001 #define MADERA_HP1R_SHRTO_SHIFT 0 -#define MADERA_HP1R_SHRTO_WIDTH 1 /* (0x0293) Accessory_Detect_Mode_1 */ #define MADERA_ACCDET_SRC 0x2000 #define MADERA_ACCDET_SRC_MASK 0x2000 #define MADERA_ACCDET_SRC_SHIFT 13 -#define MADERA_ACCDET_SRC_WIDTH 1 #define MADERA_ACCDET_POLARITY_INV_ENA 0x0080 #define MADERA_ACCDET_POLARITY_INV_ENA_MASK 0x0080 #define MADERA_ACCDET_POLARITY_INV_ENA_SHIFT 7 -#define MADERA_ACCDET_POLARITY_INV_ENA_WIDTH 1 #define MADERA_ACCDET_MODE_MASK 0x0007 #define MADERA_ACCDET_MODE_SHIFT 0 -#define MADERA_ACCDET_MODE_WIDTH 3 /* (0x0299) Headphone_Detect_0 */ #define MADERA_HPD_GND_SEL 0x0007 #define MADERA_HPD_GND_SEL_MASK 0x0007 #define MADERA_HPD_GND_SEL_SHIFT 0 -#define MADERA_HPD_GND_SEL_WIDTH 3 #define MADERA_HPD_SENSE_SEL 0x00F0 #define MADERA_HPD_SENSE_SEL_MASK 0x00F0 #define MADERA_HPD_SENSE_SEL_SHIFT 4 -#define MADERA_HPD_SENSE_SEL_WIDTH 4 #define MADERA_HPD_FRC_SEL 0x0F00 #define MADERA_HPD_FRC_SEL_MASK 0x0F00 #define MADERA_HPD_FRC_SEL_SHIFT 8 -#define MADERA_HPD_FRC_SEL_WIDTH 4 #define MADERA_HPD_OUT_SEL 0x7000 #define MADERA_HPD_OUT_SEL_MASK 0x7000 #define MADERA_HPD_OUT_SEL_SHIFT 12 -#define MADERA_HPD_OUT_SEL_WIDTH 3 #define MADERA_HPD_OVD_ENA_SEL 0x8000 #define MADERA_HPD_OVD_ENA_SEL_MASK 0x8000 #define MADERA_HPD_OVD_ENA_SEL_SHIFT 15 -#define MADERA_HPD_OVD_ENA_SEL_WIDTH 1 /* (0x029B) Headphone_Detect_1 */ #define MADERA_HP_IMPEDANCE_RANGE_MASK 0x0600 #define MADERA_HP_IMPEDANCE_RANGE_SHIFT 9 -#define MADERA_HP_IMPEDANCE_RANGE_WIDTH 2 #define MADERA_HP_STEP_SIZE 0x0100 #define MADERA_HP_STEP_SIZE_MASK 0x0100 #define MADERA_HP_STEP_SIZE_SHIFT 8 -#define MADERA_HP_STEP_SIZE_WIDTH 1 #define MADERA_HP_CLK_DIV_MASK 0x0018 #define MADERA_HP_CLK_DIV_SHIFT 3 -#define MADERA_HP_CLK_DIV_WIDTH 2 #define MADERA_HP_RATE_MASK 0x0006 #define MADERA_HP_RATE_SHIFT 1 -#define MADERA_HP_RATE_WIDTH 2 #define MADERA_HP_POLL 0x0001 #define MADERA_HP_POLL_MASK 0x0001 #define MADERA_HP_POLL_SHIFT 0 -#define MADERA_HP_POLL_WIDTH 1 /* (0x029C) Headphone_Detect_2 */ #define MADERA_HP_DONE_MASK 0x8000 #define MADERA_HP_DONE_SHIFT 15 -#define MADERA_HP_DONE_WIDTH 1 #define MADERA_HP_LVL_MASK 0x7FFF #define MADERA_HP_LVL_SHIFT 0 -#define MADERA_HP_LVL_WIDTH 15 /* (0x029D) Headphone_Detect_3 */ #define MADERA_HP_DACVAL_MASK 0x03FF #define MADERA_HP_DACVAL_SHIFT 0 -#define MADERA_HP_DACVAL_WIDTH 10 /* (0x029F) - Headphone Detect 5 */ #define MADERA_HP_DACVAL_DOWN_MASK 0x03FF #define MADERA_HP_DACVAL_DOWN_SHIFT 0 -#define MADERA_HP_DACVAL_DOWN_WIDTH 10 /* (0x02A2) Mic_Detect_1_Control_0 */ #define MADERA_MICD1_GND_MASK 0x0007 #define MADERA_MICD1_GND_SHIFT 0 -#define MADERA_MICD1_GND_WIDTH 3 #define MADERA_MICD1_SENSE_MASK 0x00F0 #define MADERA_MICD1_SENSE_SHIFT 4 -#define MADERA_MICD1_SENSE_WIDTH 4 #define MADERA_MICD1_ADC_MODE_MASK 0x8000 #define MADERA_MICD1_ADC_MODE_SHIFT 15 -#define MADERA_MICD1_ADC_MODE_WIDTH 1 /* (0x02A3) Mic_Detect_1_Control_1 */ #define MADERA_MICD_BIAS_STARTTIME_MASK 0xF000 #define MADERA_MICD_BIAS_STARTTIME_SHIFT 12 -#define MADERA_MICD_BIAS_STARTTIME_WIDTH 4 #define MADERA_MICD_RATE_MASK 0x0F00 #define MADERA_MICD_RATE_SHIFT 8 -#define MADERA_MICD_RATE_WIDTH 4 #define MADERA_MICD_BIAS_SRC_MASK 0x00F0 #define MADERA_MICD_BIAS_SRC_SHIFT 4 -#define MADERA_MICD_BIAS_SRC_WIDTH 4 #define MADERA_MICD_DBTIME 0x0002 #define MADERA_MICD_DBTIME_MASK 0x0002 #define MADERA_MICD_DBTIME_SHIFT 1 -#define MADERA_MICD_DBTIME_WIDTH 1 #define MADERA_MICD_ENA 0x0001 #define MADERA_MICD_ENA_MASK 0x0001 #define MADERA_MICD_ENA_SHIFT 0 -#define MADERA_MICD_ENA_WIDTH 1 /* (0x02A4) Mic_Detect_1_Control_2 */ #define MADERA_MICD_LVL_SEL_MASK 0x00FF #define MADERA_MICD_LVL_SEL_SHIFT 0 -#define MADERA_MICD_LVL_SEL_WIDTH 8 /* (0x02A5) Mic_Detect_1_Control_3 */ #define MADERA_MICD_LVL_0 0x0004 @@ -1859,1746 +1731,1341 @@ #define MADERA_MICD_LVL_8 0x0400 #define MADERA_MICD_LVL_MASK 0x07FC #define MADERA_MICD_LVL_SHIFT 2 -#define MADERA_MICD_LVL_WIDTH 9 #define MADERA_MICD_VALID 0x0002 #define MADERA_MICD_VALID_MASK 0x0002 #define MADERA_MICD_VALID_SHIFT 1 -#define MADERA_MICD_VALID_WIDTH 1 #define MADERA_MICD_STS 0x0001 #define MADERA_MICD_STS_MASK 0x0001 #define MADERA_MICD_STS_SHIFT 0 -#define MADERA_MICD_STS_WIDTH 1 /* (0x02AB) Mic_Detect_1_Control_4 */ #define MADERA_MICDET_ADCVAL_DIFF_MASK 0xFF00 #define MADERA_MICDET_ADCVAL_DIFF_SHIFT 8 -#define MADERA_MICDET_ADCVAL_DIFF_WIDTH 8 #define MADERA_MICDET_ADCVAL_MASK 0x007F #define MADERA_MICDET_ADCVAL_SHIFT 0 -#define MADERA_MICDET_ADCVAL_WIDTH 7 /* (0x02C6) Micd_Clamp_control */ #define MADERA_MICD_CLAMP_OVD 0x0010 #define MADERA_MICD_CLAMP_OVD_MASK 0x0010 #define MADERA_MICD_CLAMP_OVD_SHIFT 4 -#define MADERA_MICD_CLAMP_OVD_WIDTH 1 #define MADERA_MICD_CLAMP_MODE_MASK 0x000F #define MADERA_MICD_CLAMP_MODE_SHIFT 0 -#define MADERA_MICD_CLAMP_MODE_WIDTH 4 /* (0x02C8) GP_Switch_1 */ #define MADERA_SW2_MODE_MASK 0x000C #define MADERA_SW2_MODE_SHIFT 2 -#define MADERA_SW2_MODE_WIDTH 2 #define MADERA_SW1_MODE_MASK 0x0003 #define MADERA_SW1_MODE_SHIFT 0 -#define MADERA_SW1_MODE_WIDTH 2 /* (0x02D3) Jack_detect_analogue */ #define MADERA_JD2_ENA 0x0002 #define MADERA_JD2_ENA_MASK 0x0002 #define MADERA_JD2_ENA_SHIFT 1 -#define MADERA_JD2_ENA_WIDTH 1 #define MADERA_JD1_ENA 0x0001 #define MADERA_JD1_ENA_MASK 0x0001 #define MADERA_JD1_ENA_SHIFT 0 -#define MADERA_JD1_ENA_WIDTH 1 /* (0x0300) Input_Enables */ #define MADERA_IN6L_ENA 0x0800 #define MADERA_IN6L_ENA_MASK 0x0800 #define MADERA_IN6L_ENA_SHIFT 11 -#define MADERA_IN6L_ENA_WIDTH 1 #define MADERA_IN6R_ENA 0x0400 #define MADERA_IN6R_ENA_MASK 0x0400 #define MADERA_IN6R_ENA_SHIFT 10 -#define MADERA_IN6R_ENA_WIDTH 1 #define MADERA_IN5L_ENA 0x0200 #define MADERA_IN5L_ENA_MASK 0x0200 #define MADERA_IN5L_ENA_SHIFT 9 -#define MADERA_IN5L_ENA_WIDTH 1 #define MADERA_IN5R_ENA 0x0100 #define MADERA_IN5R_ENA_MASK 0x0100 #define MADERA_IN5R_ENA_SHIFT 8 -#define MADERA_IN5R_ENA_WIDTH 1 #define MADERA_IN4L_ENA 0x0080 #define MADERA_IN4L_ENA_MASK 0x0080 #define MADERA_IN4L_ENA_SHIFT 7 -#define MADERA_IN4L_ENA_WIDTH 1 #define MADERA_IN4R_ENA 0x0040 #define MADERA_IN4R_ENA_MASK 0x0040 #define MADERA_IN4R_ENA_SHIFT 6 -#define MADERA_IN4R_ENA_WIDTH 1 #define MADERA_IN3L_ENA 0x0020 #define MADERA_IN3L_ENA_MASK 0x0020 #define MADERA_IN3L_ENA_SHIFT 5 -#define MADERA_IN3L_ENA_WIDTH 1 #define MADERA_IN3R_ENA 0x0010 #define MADERA_IN3R_ENA_MASK 0x0010 #define MADERA_IN3R_ENA_SHIFT 4 -#define MADERA_IN3R_ENA_WIDTH 1 #define MADERA_IN2L_ENA 0x0008 #define MADERA_IN2L_ENA_MASK 0x0008 #define MADERA_IN2L_ENA_SHIFT 3 -#define MADERA_IN2L_ENA_WIDTH 1 #define MADERA_IN2R_ENA 0x0004 #define MADERA_IN2R_ENA_MASK 0x0004 #define MADERA_IN2R_ENA_SHIFT 2 -#define MADERA_IN2R_ENA_WIDTH 1 #define MADERA_IN1L_ENA 0x0002 #define MADERA_IN1L_ENA_MASK 0x0002 #define MADERA_IN1L_ENA_SHIFT 1 -#define MADERA_IN1L_ENA_WIDTH 1 #define MADERA_IN1R_ENA 0x0001 #define MADERA_IN1R_ENA_MASK 0x0001 #define MADERA_IN1R_ENA_SHIFT 0 -#define MADERA_IN1R_ENA_WIDTH 1 /* (0x0308) Input_Rate */ #define MADERA_IN_RATE_MASK 0xF800 #define MADERA_IN_RATE_SHIFT 11 -#define MADERA_IN_RATE_WIDTH 5 #define MADERA_IN_MODE_MASK 0x0400 #define MADERA_IN_MODE_SHIFT 10 -#define MADERA_IN_MODE_WIDTH 1 /* (0x0309) Input_Volume_Ramp */ #define MADERA_IN_VD_RAMP_MASK 0x0070 #define MADERA_IN_VD_RAMP_SHIFT 4 -#define MADERA_IN_VD_RAMP_WIDTH 3 #define MADERA_IN_VI_RAMP_MASK 0x0007 #define MADERA_IN_VI_RAMP_SHIFT 0 -#define MADERA_IN_VI_RAMP_WIDTH 3 /* (0x030C) HPF_Control */ #define MADERA_IN_HPF_CUT_MASK 0x0007 #define MADERA_IN_HPF_CUT_SHIFT 0 -#define MADERA_IN_HPF_CUT_WIDTH 3 /* (0x0310) IN1L_Control */ #define MADERA_IN1L_HPF_MASK 0x8000 #define MADERA_IN1L_HPF_SHIFT 15 -#define MADERA_IN1L_HPF_WIDTH 1 #define MADERA_IN1_DMIC_SUP_MASK 0x1800 #define MADERA_IN1_DMIC_SUP_SHIFT 11 -#define MADERA_IN1_DMIC_SUP_WIDTH 2 #define MADERA_IN1_MODE_MASK 0x0400 #define MADERA_IN1_MODE_SHIFT 10 -#define MADERA_IN1_MODE_WIDTH 1 #define MADERA_IN1L_PGA_VOL_MASK 0x00FE #define MADERA_IN1L_PGA_VOL_SHIFT 1 -#define MADERA_IN1L_PGA_VOL_WIDTH 7 /* (0x0311) ADC_Digital_Volume_1L */ #define MADERA_IN1L_SRC_MASK 0x4000 #define MADERA_IN1L_SRC_SHIFT 14 -#define MADERA_IN1L_SRC_WIDTH 1 #define MADERA_IN1L_SRC_SE_MASK 0x2000 #define MADERA_IN1L_SRC_SE_SHIFT 13 -#define MADERA_IN1L_SRC_SE_WIDTH 1 #define MADERA_IN1L_LP_MODE 0x0800 #define MADERA_IN1L_LP_MODE_MASK 0x0800 #define MADERA_IN1L_LP_MODE_SHIFT 11 -#define MADERA_IN1L_LP_MODE_WIDTH 1 #define MADERA_IN_VU 0x0200 #define MADERA_IN_VU_MASK 0x0200 #define MADERA_IN_VU_SHIFT 9 -#define MADERA_IN_VU_WIDTH 1 #define MADERA_IN1L_MUTE 0x0100 #define MADERA_IN1L_MUTE_MASK 0x0100 #define MADERA_IN1L_MUTE_SHIFT 8 -#define MADERA_IN1L_MUTE_WIDTH 1 #define MADERA_IN1L_DIG_VOL_MASK 0x00FF #define MADERA_IN1L_DIG_VOL_SHIFT 0 -#define MADERA_IN1L_DIG_VOL_WIDTH 8 /* (0x0312) DMIC1L_Control */ #define MADERA_IN1_OSR_MASK 0x0700 #define MADERA_IN1_OSR_SHIFT 8 -#define MADERA_IN1_OSR_WIDTH 3 /* (0x0313) IN1L_Rate_Control */ #define MADERA_IN1L_RATE_MASK 0xF800 #define MADERA_IN1L_RATE_SHIFT 11 -#define MADERA_IN1L_RATE_WIDTH 5 /* (0x0314) IN1R_Control */ #define MADERA_IN1R_HPF_MASK 0x8000 #define MADERA_IN1R_HPF_SHIFT 15 -#define MADERA_IN1R_HPF_WIDTH 1 #define MADERA_IN1R_PGA_VOL_MASK 0x00FE #define MADERA_IN1R_PGA_VOL_SHIFT 1 -#define MADERA_IN1R_PGA_VOL_WIDTH 7 #define MADERA_IN1_DMICCLK_SRC_MASK 0x1800 #define MADERA_IN1_DMICCLK_SRC_SHIFT 11 -#define MADERA_IN1_DMICCLK_SRC_WIDTH 2 /* (0x0315) ADC_Digital_Volume_1R */ #define MADERA_IN1R_SRC_MASK 0x4000 #define MADERA_IN1R_SRC_SHIFT 14 -#define MADERA_IN1R_SRC_WIDTH 1 #define MADERA_IN1R_SRC_SE_MASK 0x2000 #define MADERA_IN1R_SRC_SE_SHIFT 13 -#define MADERA_IN1R_SRC_SE_WIDTH 1 #define MADERA_IN1R_LP_MODE 0x0800 #define MADERA_IN1R_LP_MODE_MASK 0x0800 #define MADERA_IN1R_LP_MODE_SHIFT 11 -#define MADERA_IN1R_LP_MODE_WIDTH 1 #define MADERA_IN1R_MUTE 0x0100 #define MADERA_IN1R_MUTE_MASK 0x0100 #define MADERA_IN1R_MUTE_SHIFT 8 -#define MADERA_IN1R_MUTE_WIDTH 1 #define MADERA_IN1R_DIG_VOL_MASK 0x00FF #define MADERA_IN1R_DIG_VOL_SHIFT 0 -#define MADERA_IN1R_DIG_VOL_WIDTH 8 /* (0x0317) IN1R_Rate_Control */ #define MADERA_IN1R_RATE_MASK 0xF800 #define MADERA_IN1R_RATE_SHIFT 11 -#define MADERA_IN1R_RATE_WIDTH 5 /* (0x0318) IN2L_Control */ #define MADERA_IN2L_HPF_MASK 0x8000 #define MADERA_IN2L_HPF_SHIFT 15 -#define MADERA_IN2L_HPF_WIDTH 1 #define MADERA_IN2_DMIC_SUP_MASK 0x1800 #define MADERA_IN2_DMIC_SUP_SHIFT 11 -#define MADERA_IN2_DMIC_SUP_WIDTH 2 #define MADERA_IN2_MODE_MASK 0x0400 #define MADERA_IN2_MODE_SHIFT 10 -#define MADERA_IN2_MODE_WIDTH 1 #define MADERA_IN2L_PGA_VOL_MASK 0x00FE #define MADERA_IN2L_PGA_VOL_SHIFT 1 -#define MADERA_IN2L_PGA_VOL_WIDTH 7 /* (0x0319) ADC_Digital_Volume_2L */ #define MADERA_IN2L_SRC_MASK 0x4000 #define MADERA_IN2L_SRC_SHIFT 14 -#define MADERA_IN2L_SRC_WIDTH 1 #define MADERA_IN2L_SRC_SE_MASK 0x2000 #define MADERA_IN2L_SRC_SE_SHIFT 13 -#define MADERA_IN2L_SRC_SE_WIDTH 1 #define MADERA_IN2L_LP_MODE 0x0800 #define MADERA_IN2L_LP_MODE_MASK 0x0800 #define MADERA_IN2L_LP_MODE_SHIFT 11 -#define MADERA_IN2L_LP_MODE_WIDTH 1 #define MADERA_IN2L_MUTE 0x0100 #define MADERA_IN2L_MUTE_MASK 0x0100 #define MADERA_IN2L_MUTE_SHIFT 8 -#define MADERA_IN2L_MUTE_WIDTH 1 #define MADERA_IN2L_DIG_VOL_MASK 0x00FF #define MADERA_IN2L_DIG_VOL_SHIFT 0 -#define MADERA_IN2L_DIG_VOL_WIDTH 8 /* (0x031A) DMIC2L_Control */ #define MADERA_IN2_OSR_MASK 0x0700 #define MADERA_IN2_OSR_SHIFT 8 -#define MADERA_IN2_OSR_WIDTH 3 /* (0x031C) IN2R_Control */ #define MADERA_IN2R_HPF_MASK 0x8000 #define MADERA_IN2R_HPF_SHIFT 15 -#define MADERA_IN2R_HPF_WIDTH 1 #define MADERA_IN2R_PGA_VOL_MASK 0x00FE #define MADERA_IN2R_PGA_VOL_SHIFT 1 -#define MADERA_IN2R_PGA_VOL_WIDTH 7 #define MADERA_IN2_DMICCLK_SRC_MASK 0x1800 #define MADERA_IN2_DMICCLK_SRC_SHIFT 11 -#define MADERA_IN2_DMICCLK_SRC_WIDTH 2 /* (0x031D) ADC_Digital_Volume_2R */ #define MADERA_IN2R_SRC_MASK 0x4000 #define MADERA_IN2R_SRC_SHIFT 14 -#define MADERA_IN2R_SRC_WIDTH 1 #define MADERA_IN2R_SRC_SE_MASK 0x2000 #define MADERA_IN2R_SRC_SE_SHIFT 13 -#define MADERA_IN2R_SRC_SE_WIDTH 1 #define MADERA_IN2R_LP_MODE 0x0800 #define MADERA_IN2R_LP_MODE_MASK 0x0800 #define MADERA_IN2R_LP_MODE_SHIFT 11 -#define MADERA_IN2R_LP_MODE_WIDTH 1 #define MADERA_IN2R_MUTE 0x0100 #define MADERA_IN2R_MUTE_MASK 0x0100 #define MADERA_IN2R_MUTE_SHIFT 8 -#define MADERA_IN2R_MUTE_WIDTH 1 #define MADERA_IN2R_DIG_VOL_MASK 0x00FF #define MADERA_IN2R_DIG_VOL_SHIFT 0 -#define MADERA_IN2R_DIG_VOL_WIDTH 8 /* (0x0320) IN3L_Control */ #define MADERA_IN3L_HPF_MASK 0x8000 #define MADERA_IN3L_HPF_SHIFT 15 -#define MADERA_IN3L_HPF_WIDTH 1 #define MADERA_IN3_DMIC_SUP_MASK 0x1800 #define MADERA_IN3_DMIC_SUP_SHIFT 11 -#define MADERA_IN3_DMIC_SUP_WIDTH 2 #define MADERA_IN3_MODE_MASK 0x0400 #define MADERA_IN3_MODE_SHIFT 10 -#define MADERA_IN3_MODE_WIDTH 1 #define MADERA_IN3L_PGA_VOL_MASK 0x00FE #define MADERA_IN3L_PGA_VOL_SHIFT 1 -#define MADERA_IN3L_PGA_VOL_WIDTH 7 /* (0x0321) ADC_Digital_Volume_3L */ #define MADERA_IN3L_MUTE 0x0100 #define MADERA_IN3L_MUTE_MASK 0x0100 #define MADERA_IN3L_MUTE_SHIFT 8 -#define MADERA_IN3L_MUTE_WIDTH 1 #define MADERA_IN3L_DIG_VOL_MASK 0x00FF #define MADERA_IN3L_DIG_VOL_SHIFT 0 -#define MADERA_IN3L_DIG_VOL_WIDTH 8 /* (0x0322) DMIC3L_Control */ #define MADERA_IN3_OSR_MASK 0x0700 #define MADERA_IN3_OSR_SHIFT 8 -#define MADERA_IN3_OSR_WIDTH 3 /* (0x0324) IN3R_Control */ #define MADERA_IN3R_HPF_MASK 0x8000 #define MADERA_IN3R_HPF_SHIFT 15 -#define MADERA_IN3R_HPF_WIDTH 1 #define MADERA_IN3R_PGA_VOL_MASK 0x00FE #define MADERA_IN3R_PGA_VOL_SHIFT 1 -#define MADERA_IN3R_PGA_VOL_WIDTH 7 #define MADERA_IN3_DMICCLK_SRC_MASK 0x1800 #define MADERA_IN3_DMICCLK_SRC_SHIFT 11 -#define MADERA_IN3_DMICCLK_SRC_WIDTH 2 /* (0x0325) ADC_Digital_Volume_3R */ #define MADERA_IN3R_MUTE 0x0100 #define MADERA_IN3R_MUTE_MASK 0x0100 #define MADERA_IN3R_MUTE_SHIFT 8 -#define MADERA_IN3R_MUTE_WIDTH 1 #define MADERA_IN3R_DIG_VOL_MASK 0x00FF #define MADERA_IN3R_DIG_VOL_SHIFT 0 -#define MADERA_IN3R_DIG_VOL_WIDTH 8 /* (0x0328) IN4L_Control */ #define MADERA_IN4L_HPF_MASK 0x8000 #define MADERA_IN4L_HPF_SHIFT 15 -#define MADERA_IN4L_HPF_WIDTH 1 #define MADERA_IN4_DMIC_SUP_MASK 0x1800 #define MADERA_IN4_DMIC_SUP_SHIFT 11 -#define MADERA_IN4_DMIC_SUP_WIDTH 2 /* (0x0329) ADC_Digital_Volume_4L */ #define MADERA_IN4L_MUTE 0x0100 #define MADERA_IN4L_MUTE_MASK 0x0100 #define MADERA_IN4L_MUTE_SHIFT 8 -#define MADERA_IN4L_MUTE_WIDTH 1 #define MADERA_IN4L_DIG_VOL_MASK 0x00FF #define MADERA_IN4L_DIG_VOL_SHIFT 0 -#define MADERA_IN4L_DIG_VOL_WIDTH 8 /* (0x032A) DMIC4L_Control */ #define MADERA_IN4_OSR_MASK 0x0700 #define MADERA_IN4_OSR_SHIFT 8 -#define MADERA_IN4_OSR_WIDTH 3 /* (0x032C) IN4R_Control */ #define MADERA_IN4R_HPF_MASK 0x8000 #define MADERA_IN4R_HPF_SHIFT 15 -#define MADERA_IN4R_HPF_WIDTH 1 #define MADERA_IN4_DMICCLK_SRC_MASK 0x1800 #define MADERA_IN4_DMICCLK_SRC_SHIFT 11 -#define MADERA_IN4_DMICCLK_SRC_WIDTH 2 /* (0x032D) ADC_Digital_Volume_4R */ #define MADERA_IN4R_MUTE 0x0100 #define MADERA_IN4R_MUTE_MASK 0x0100 #define MADERA_IN4R_MUTE_SHIFT 8 -#define MADERA_IN4R_MUTE_WIDTH 1 #define MADERA_IN4R_DIG_VOL_MASK 0x00FF #define MADERA_IN4R_DIG_VOL_SHIFT 0 -#define MADERA_IN4R_DIG_VOL_WIDTH 8 /* (0x0330) IN5L_Control */ #define MADERA_IN5L_HPF_MASK 0x8000 #define MADERA_IN5L_HPF_SHIFT 15 -#define MADERA_IN5L_HPF_WIDTH 1 #define MADERA_IN5_DMIC_SUP_MASK 0x1800 #define MADERA_IN5_DMIC_SUP_SHIFT 11 -#define MADERA_IN5_DMIC_SUP_WIDTH 2 /* (0x0331) ADC_Digital_Volume_5L */ #define MADERA_IN5L_MUTE 0x0100 #define MADERA_IN5L_MUTE_MASK 0x0100 #define MADERA_IN5L_MUTE_SHIFT 8 -#define MADERA_IN5L_MUTE_WIDTH 1 #define MADERA_IN5L_DIG_VOL_MASK 0x00FF #define MADERA_IN5L_DIG_VOL_SHIFT 0 -#define MADERA_IN5L_DIG_VOL_WIDTH 8 /* (0x0332) DMIC5L_Control */ #define MADERA_IN5_OSR_MASK 0x0700 #define MADERA_IN5_OSR_SHIFT 8 -#define MADERA_IN5_OSR_WIDTH 3 /* (0x0334) IN5R_Control */ #define MADERA_IN5R_HPF_MASK 0x8000 #define MADERA_IN5R_HPF_SHIFT 15 -#define MADERA_IN5R_HPF_WIDTH 1 #define MADERA_IN5_DMICCLK_SRC_MASK 0x1800 #define MADERA_IN5_DMICCLK_SRC_SHIFT 11 -#define MADERA_IN5_DMICCLK_SRC_WIDTH 2 /* (0x0335) ADC_Digital_Volume_5R */ #define MADERA_IN5R_MUTE 0x0100 #define MADERA_IN5R_MUTE_MASK 0x0100 #define MADERA_IN5R_MUTE_SHIFT 8 -#define MADERA_IN5R_MUTE_WIDTH 1 #define MADERA_IN5R_DIG_VOL_MASK 0x00FF #define MADERA_IN5R_DIG_VOL_SHIFT 0 -#define MADERA_IN5R_DIG_VOL_WIDTH 8 /* (0x0338) IN6L_Control */ #define MADERA_IN6L_HPF_MASK 0x8000 #define MADERA_IN6L_HPF_SHIFT 15 -#define MADERA_IN6L_HPF_WIDTH 1 #define MADERA_IN6_DMIC_SUP_MASK 0x1800 #define MADERA_IN6_DMIC_SUP_SHIFT 11 -#define MADERA_IN6_DMIC_SUP_WIDTH 2 /* (0x0339) ADC_Digital_Volume_6L */ #define MADERA_IN6L_MUTE 0x0100 #define MADERA_IN6L_MUTE_MASK 0x0100 #define MADERA_IN6L_MUTE_SHIFT 8 -#define MADERA_IN6L_MUTE_WIDTH 1 #define MADERA_IN6L_DIG_VOL_MASK 0x00FF #define MADERA_IN6L_DIG_VOL_SHIFT 0 -#define MADERA_IN6L_DIG_VOL_WIDTH 8 /* (0x033A) DMIC6L_Control */ #define MADERA_IN6_OSR_MASK 0x0700 #define MADERA_IN6_OSR_SHIFT 8 -#define MADERA_IN6_OSR_WIDTH 3 /* (0x033C) IN6R_Control */ #define MADERA_IN6R_HPF_MASK 0x8000 #define MADERA_IN6R_HPF_SHIFT 15 -#define MADERA_IN6R_HPF_WIDTH 1 /* (0x033D) ADC_Digital_Volume_6R */ #define MADERA_IN6R_MUTE 0x0100 #define MADERA_IN6R_MUTE_MASK 0x0100 #define MADERA_IN6R_MUTE_SHIFT 8 -#define MADERA_IN6R_MUTE_WIDTH 1 #define MADERA_IN6R_DIG_VOL_MASK 0x00FF #define MADERA_IN6R_DIG_VOL_SHIFT 0 -#define MADERA_IN6R_DIG_VOL_WIDTH 8 /* (0x033E) DMIC6R_Control */ #define MADERA_IN6_DMICCLK_SRC_MASK 0x1800 #define MADERA_IN6_DMICCLK_SRC_SHIFT 11 -#define MADERA_IN6_DMICCLK_SRC_WIDTH 2 /* (0x0400) Output_Enables_1 */ #define MADERA_EP_SEL 0x8000 #define MADERA_EP_SEL_MASK 0x8000 #define MADERA_EP_SEL_SHIFT 15 -#define MADERA_EP_SEL_WIDTH 1 #define MADERA_OUT6L_ENA 0x0800 #define MADERA_OUT6L_ENA_MASK 0x0800 #define MADERA_OUT6L_ENA_SHIFT 11 -#define MADERA_OUT6L_ENA_WIDTH 1 #define MADERA_OUT6R_ENA 0x0400 #define MADERA_OUT6R_ENA_MASK 0x0400 #define MADERA_OUT6R_ENA_SHIFT 10 -#define MADERA_OUT6R_ENA_WIDTH 1 #define MADERA_OUT5L_ENA 0x0200 #define MADERA_OUT5L_ENA_MASK 0x0200 #define MADERA_OUT5L_ENA_SHIFT 9 -#define MADERA_OUT5L_ENA_WIDTH 1 #define MADERA_OUT5R_ENA 0x0100 #define MADERA_OUT5R_ENA_MASK 0x0100 #define MADERA_OUT5R_ENA_SHIFT 8 -#define MADERA_OUT5R_ENA_WIDTH 1 #define MADERA_OUT4L_ENA 0x0080 #define MADERA_OUT4L_ENA_MASK 0x0080 #define MADERA_OUT4L_ENA_SHIFT 7 -#define MADERA_OUT4L_ENA_WIDTH 1 #define MADERA_OUT4R_ENA 0x0040 #define MADERA_OUT4R_ENA_MASK 0x0040 #define MADERA_OUT4R_ENA_SHIFT 6 -#define MADERA_OUT4R_ENA_WIDTH 1 #define MADERA_OUT3L_ENA 0x0020 #define MADERA_OUT3L_ENA_MASK 0x0020 #define MADERA_OUT3L_ENA_SHIFT 5 -#define MADERA_OUT3L_ENA_WIDTH 1 #define MADERA_OUT3R_ENA 0x0010 #define MADERA_OUT3R_ENA_MASK 0x0010 #define MADERA_OUT3R_ENA_SHIFT 4 -#define MADERA_OUT3R_ENA_WIDTH 1 #define MADERA_OUT2L_ENA 0x0008 #define MADERA_OUT2L_ENA_MASK 0x0008 #define MADERA_OUT2L_ENA_SHIFT 3 -#define MADERA_OUT2L_ENA_WIDTH 1 #define MADERA_OUT2R_ENA 0x0004 #define MADERA_OUT2R_ENA_MASK 0x0004 #define MADERA_OUT2R_ENA_SHIFT 2 -#define MADERA_OUT2R_ENA_WIDTH 1 #define MADERA_OUT1L_ENA 0x0002 #define MADERA_OUT1L_ENA_MASK 0x0002 #define MADERA_OUT1L_ENA_SHIFT 1 -#define MADERA_OUT1L_ENA_WIDTH 1 #define MADERA_OUT1R_ENA 0x0001 #define MADERA_OUT1R_ENA_MASK 0x0001 #define MADERA_OUT1R_ENA_SHIFT 0 -#define MADERA_OUT1R_ENA_WIDTH 1 /* (0x0408) Output_Rate_1 */ #define MADERA_CP_DAC_MODE_MASK 0x0040 #define MADERA_CP_DAC_MODE_SHIFT 6 -#define MADERA_CP_DAC_MODE_WIDTH 1 #define MADERA_OUT_EXT_CLK_DIV_MASK 0x0030 #define MADERA_OUT_EXT_CLK_DIV_SHIFT 4 -#define MADERA_OUT_EXT_CLK_DIV_WIDTH 2 #define MADERA_OUT_CLK_SRC_MASK 0x0007 #define MADERA_OUT_CLK_SRC_SHIFT 0 -#define MADERA_OUT_CLK_SRC_WIDTH 3 /* (0x0409) Output_Volume_Ramp */ #define MADERA_OUT_VD_RAMP_MASK 0x0070 #define MADERA_OUT_VD_RAMP_SHIFT 4 -#define MADERA_OUT_VD_RAMP_WIDTH 3 #define MADERA_OUT_VI_RAMP_MASK 0x0007 #define MADERA_OUT_VI_RAMP_SHIFT 0 -#define MADERA_OUT_VI_RAMP_WIDTH 3 /* (0x0410) Output_Path_Config_1L */ #define MADERA_OUT1_MONO 0x1000 #define MADERA_OUT1_MONO_MASK 0x1000 #define MADERA_OUT1_MONO_SHIFT 12 -#define MADERA_OUT1_MONO_WIDTH 1 #define MADERA_OUT1L_ANC_SRC_MASK 0x0C00 #define MADERA_OUT1L_ANC_SRC_SHIFT 10 -#define MADERA_OUT1L_ANC_SRC_WIDTH 2 /* (0x0411) DAC_Digital_Volume_1L */ #define MADERA_OUT1L_VU 0x0200 #define MADERA_OUT1L_VU_MASK 0x0200 #define MADERA_OUT1L_VU_SHIFT 9 -#define MADERA_OUT1L_VU_WIDTH 1 #define MADERA_OUT1L_MUTE 0x0100 #define MADERA_OUT1L_MUTE_MASK 0x0100 #define MADERA_OUT1L_MUTE_SHIFT 8 -#define MADERA_OUT1L_MUTE_WIDTH 1 #define MADERA_OUT1L_VOL_MASK 0x00FF #define MADERA_OUT1L_VOL_SHIFT 0 -#define MADERA_OUT1L_VOL_WIDTH 8 /* (0x0412) Output_Path_Config_1 */ #define MADERA_HP1_GND_SEL_MASK 0x0007 #define MADERA_HP1_GND_SEL_SHIFT 0 -#define MADERA_HP1_GND_SEL_WIDTH 3 /* (0x0414) Output_Path_Config_1R */ #define MADERA_OUT1R_ANC_SRC_MASK 0x0C00 #define MADERA_OUT1R_ANC_SRC_SHIFT 10 -#define MADERA_OUT1R_ANC_SRC_WIDTH 2 /* (0x0415) DAC_Digital_Volume_1R */ #define MADERA_OUT1R_MUTE 0x0100 #define MADERA_OUT1R_MUTE_MASK 0x0100 #define MADERA_OUT1R_MUTE_SHIFT 8 -#define MADERA_OUT1R_MUTE_WIDTH 1 #define MADERA_OUT1R_VOL_MASK 0x00FF #define MADERA_OUT1R_VOL_SHIFT 0 -#define MADERA_OUT1R_VOL_WIDTH 8 /* (0x0418) Output_Path_Config_2L */ #define MADERA_OUT2L_ANC_SRC_MASK 0x0C00 #define MADERA_OUT2L_ANC_SRC_SHIFT 10 -#define MADERA_OUT2L_ANC_SRC_WIDTH 2 /* (0x0419) DAC_Digital_Volume_2L */ #define MADERA_OUT2L_MUTE 0x0100 #define MADERA_OUT2L_MUTE_MASK 0x0100 #define MADERA_OUT2L_MUTE_SHIFT 8 -#define MADERA_OUT2L_MUTE_WIDTH 1 #define MADERA_OUT2L_VOL_MASK 0x00FF #define MADERA_OUT2L_VOL_SHIFT 0 -#define MADERA_OUT2L_VOL_WIDTH 8 /* (0x041A) Output_Path_Config_2 */ #define MADERA_HP2_GND_SEL_MASK 0x0007 #define MADERA_HP2_GND_SEL_SHIFT 0 -#define MADERA_HP2_GND_SEL_WIDTH 3 /* (0x041C) Output_Path_Config_2R */ #define MADERA_OUT2R_ANC_SRC_MASK 0x0C00 #define MADERA_OUT2R_ANC_SRC_SHIFT 10 -#define MADERA_OUT2R_ANC_SRC_WIDTH 2 /* (0x041D) DAC_Digital_Volume_2R */ #define MADERA_OUT2R_MUTE 0x0100 #define MADERA_OUT2R_MUTE_MASK 0x0100 #define MADERA_OUT2R_MUTE_SHIFT 8 -#define MADERA_OUT2R_MUTE_WIDTH 1 #define MADERA_OUT2R_VOL_MASK 0x00FF #define MADERA_OUT2R_VOL_SHIFT 0 -#define MADERA_OUT2R_VOL_WIDTH 8 /* (0x0420) Output_Path_Config_3L */ #define MADERA_OUT3L_ANC_SRC_MASK 0x0C00 #define MADERA_OUT3L_ANC_SRC_SHIFT 10 -#define MADERA_OUT3L_ANC_SRC_WIDTH 2 /* (0x0421) DAC_Digital_Volume_3L */ #define MADERA_OUT3L_MUTE 0x0100 #define MADERA_OUT3L_MUTE_MASK 0x0100 #define MADERA_OUT3L_MUTE_SHIFT 8 -#define MADERA_OUT3L_MUTE_WIDTH 1 #define MADERA_OUT3L_VOL_MASK 0x00FF #define MADERA_OUT3L_VOL_SHIFT 0 -#define MADERA_OUT3L_VOL_WIDTH 8 /* (0x0424) Output_Path_Config_3R */ #define MADERA_OUT3R_ANC_SRC_MASK 0x0C00 #define MADERA_OUT3R_ANC_SRC_SHIFT 10 -#define MADERA_OUT3R_ANC_SRC_WIDTH 2 /* (0x0425) DAC_Digital_Volume_3R */ #define MADERA_OUT3R_MUTE 0x0100 #define MADERA_OUT3R_MUTE_MASK 0x0100 #define MADERA_OUT3R_MUTE_SHIFT 8 -#define MADERA_OUT3R_MUTE_WIDTH 1 #define MADERA_OUT3R_VOL_MASK 0x00FF #define MADERA_OUT3R_VOL_SHIFT 0 -#define MADERA_OUT3R_VOL_WIDTH 8 /* (0x0428) Output_Path_Config_4L */ #define MADERA_OUT4L_ANC_SRC_MASK 0x0C00 #define MADERA_OUT4L_ANC_SRC_SHIFT 10 -#define MADERA_OUT4L_ANC_SRC_WIDTH 2 /* (0x0429) DAC_Digital_Volume_4L */ #define MADERA_OUT4L_MUTE 0x0100 #define MADERA_OUT4L_MUTE_MASK 0x0100 #define MADERA_OUT4L_MUTE_SHIFT 8 -#define MADERA_OUT4L_MUTE_WIDTH 1 #define MADERA_OUT4L_VOL_MASK 0x00FF #define MADERA_OUT4L_VOL_SHIFT 0 -#define MADERA_OUT4L_VOL_WIDTH 8 /* (0x042C) Output_Path_Config_4R */ #define MADERA_OUT4R_ANC_SRC_MASK 0x0C00 #define MADERA_OUT4R_ANC_SRC_SHIFT 10 -#define MADERA_OUT4R_ANC_SRC_WIDTH 2 /* (0x042D) DAC_Digital_Volume_4R */ #define MADERA_OUT4R_MUTE 0x0100 #define MADERA_OUT4R_MUTE_MASK 0x0100 #define MADERA_OUT4R_MUTE_SHIFT 8 -#define MADERA_OUT4R_MUTE_WIDTH 1 #define MADERA_OUT4R_VOL_MASK 0x00FF #define MADERA_OUT4R_VOL_SHIFT 0 -#define MADERA_OUT4R_VOL_WIDTH 8 /* (0x0430) Output_Path_Config_5L */ #define MADERA_OUT5_OSR 0x2000 #define MADERA_OUT5_OSR_MASK 0x2000 #define MADERA_OUT5_OSR_SHIFT 13 -#define MADERA_OUT5_OSR_WIDTH 1 #define MADERA_OUT5L_ANC_SRC_MASK 0x0C00 #define MADERA_OUT5L_ANC_SRC_SHIFT 10 -#define MADERA_OUT5L_ANC_SRC_WIDTH 2 /* (0x0431) DAC_Digital_Volume_5L */ #define MADERA_OUT5L_MUTE 0x0100 #define MADERA_OUT5L_MUTE_MASK 0x0100 #define MADERA_OUT5L_MUTE_SHIFT 8 -#define MADERA_OUT5L_MUTE_WIDTH 1 #define MADERA_OUT5L_VOL_MASK 0x00FF #define MADERA_OUT5L_VOL_SHIFT 0 -#define MADERA_OUT5L_VOL_WIDTH 8 /* (0x0434) Output_Path_Config_5R */ #define MADERA_OUT5R_ANC_SRC_MASK 0x0C00 #define MADERA_OUT5R_ANC_SRC_SHIFT 10 -#define MADERA_OUT5R_ANC_SRC_WIDTH 2 /* (0x0435) DAC_Digital_Volume_5R */ #define MADERA_OUT5R_MUTE 0x0100 #define MADERA_OUT5R_MUTE_MASK 0x0100 #define MADERA_OUT5R_MUTE_SHIFT 8 -#define MADERA_OUT5R_MUTE_WIDTH 1 #define MADERA_OUT5R_VOL_MASK 0x00FF #define MADERA_OUT5R_VOL_SHIFT 0 -#define MADERA_OUT5R_VOL_WIDTH 8 /* (0x0438) Output_Path_Config_6L */ #define MADERA_OUT6_OSR 0x2000 #define MADERA_OUT6_OSR_MASK 0x2000 #define MADERA_OUT6_OSR_SHIFT 13 -#define MADERA_OUT6_OSR_WIDTH 1 #define MADERA_OUT6L_ANC_SRC_MASK 0x0C00 #define MADERA_OUT6L_ANC_SRC_SHIFT 10 -#define MADERA_OUT6L_ANC_SRC_WIDTH 2 /* (0x0439) DAC_Digital_Volume_6L */ #define MADERA_OUT6L_MUTE 0x0100 #define MADERA_OUT6L_MUTE_MASK 0x0100 #define MADERA_OUT6L_MUTE_SHIFT 8 -#define MADERA_OUT6L_MUTE_WIDTH 1 #define MADERA_OUT6L_VOL_MASK 0x00FF #define MADERA_OUT6L_VOL_SHIFT 0 -#define MADERA_OUT6L_VOL_WIDTH 8 /* (0x043C) Output_Path_Config_6R */ #define MADERA_OUT6R_ANC_SRC_MASK 0x0C00 #define MADERA_OUT6R_ANC_SRC_SHIFT 10 -#define MADERA_OUT6R_ANC_SRC_WIDTH 2 /* (0x043D) DAC_Digital_Volume_6R */ #define MADERA_OUT6R_MUTE 0x0100 #define MADERA_OUT6R_MUTE_MASK 0x0100 #define MADERA_OUT6R_MUTE_SHIFT 8 -#define MADERA_OUT6R_MUTE_WIDTH 1 #define MADERA_OUT6R_VOL_MASK 0x00FF #define MADERA_OUT6R_VOL_SHIFT 0 -#define MADERA_OUT6R_VOL_WIDTH 8 /* (0x0450) - DAC AEC Control 1 */ #define MADERA_AEC1_LOOPBACK_SRC_MASK 0x003C #define MADERA_AEC1_LOOPBACK_SRC_SHIFT 2 -#define MADERA_AEC1_LOOPBACK_SRC_WIDTH 4 #define MADERA_AEC1_ENA_STS 0x0002 #define MADERA_AEC1_ENA_STS_MASK 0x0002 #define MADERA_AEC1_ENA_STS_SHIFT 1 -#define MADERA_AEC1_ENA_STS_WIDTH 1 #define MADERA_AEC1_LOOPBACK_ENA 0x0001 #define MADERA_AEC1_LOOPBACK_ENA_MASK 0x0001 #define MADERA_AEC1_LOOPBACK_ENA_SHIFT 0 -#define MADERA_AEC1_LOOPBACK_ENA_WIDTH 1 /* (0x0451) DAC_AEC_Control_2 */ #define MADERA_AEC2_LOOPBACK_SRC_MASK 0x003C #define MADERA_AEC2_LOOPBACK_SRC_SHIFT 2 -#define MADERA_AEC2_LOOPBACK_SRC_WIDTH 4 #define MADERA_AEC2_ENA_STS 0x0002 #define MADERA_AEC2_ENA_STS_MASK 0x0002 #define MADERA_AEC2_ENA_STS_SHIFT 1 -#define MADERA_AEC2_ENA_STS_WIDTH 1 #define MADERA_AEC2_LOOPBACK_ENA 0x0001 #define MADERA_AEC2_LOOPBACK_ENA_MASK 0x0001 #define MADERA_AEC2_LOOPBACK_ENA_SHIFT 0 -#define MADERA_AEC2_LOOPBACK_ENA_WIDTH 1 /* (0x0458) Noise_Gate_Control */ #define MADERA_NGATE_HOLD_MASK 0x0030 #define MADERA_NGATE_HOLD_SHIFT 4 -#define MADERA_NGATE_HOLD_WIDTH 2 #define MADERA_NGATE_THR_MASK 0x000E #define MADERA_NGATE_THR_SHIFT 1 -#define MADERA_NGATE_THR_WIDTH 3 #define MADERA_NGATE_ENA 0x0001 #define MADERA_NGATE_ENA_MASK 0x0001 #define MADERA_NGATE_ENA_SHIFT 0 -#define MADERA_NGATE_ENA_WIDTH 1 /* (0x0490) PDM_SPK1_CTRL_1 */ #define MADERA_SPK1R_MUTE 0x2000 #define MADERA_SPK1R_MUTE_MASK 0x2000 #define MADERA_SPK1R_MUTE_SHIFT 13 -#define MADERA_SPK1R_MUTE_WIDTH 1 #define MADERA_SPK1L_MUTE 0x1000 #define MADERA_SPK1L_MUTE_MASK 0x1000 #define MADERA_SPK1L_MUTE_SHIFT 12 -#define MADERA_SPK1L_MUTE_WIDTH 1 #define MADERA_SPK1_MUTE_ENDIAN 0x0100 #define MADERA_SPK1_MUTE_ENDIAN_MASK 0x0100 #define MADERA_SPK1_MUTE_ENDIAN_SHIFT 8 -#define MADERA_SPK1_MUTE_ENDIAN_WIDTH 1 #define MADERA_SPK1_MUTE_SEQ1_MASK 0x00FF #define MADERA_SPK1_MUTE_SEQ1_SHIFT 0 -#define MADERA_SPK1_MUTE_SEQ1_WIDTH 8 /* (0x0491) PDM_SPK1_CTRL_2 */ #define MADERA_SPK1_FMT 0x0001 #define MADERA_SPK1_FMT_MASK 0x0001 #define MADERA_SPK1_FMT_SHIFT 0 -#define MADERA_SPK1_FMT_WIDTH 1 /* (0x0492) PDM_SPK2_CTRL_1 */ #define MADERA_SPK2R_MUTE 0x2000 #define MADERA_SPK2R_MUTE_MASK 0x2000 #define MADERA_SPK2R_MUTE_SHIFT 13 -#define MADERA_SPK2R_MUTE_WIDTH 1 #define MADERA_SPK2L_MUTE 0x1000 #define MADERA_SPK2L_MUTE_MASK 0x1000 #define MADERA_SPK2L_MUTE_SHIFT 12 -#define MADERA_SPK2L_MUTE_WIDTH 1 /* (0x04A0) - HP1 Short Circuit Ctrl */ #define MADERA_HP1_SC_ENA 0x1000 #define MADERA_HP1_SC_ENA_MASK 0x1000 #define MADERA_HP1_SC_ENA_SHIFT 12 -#define MADERA_HP1_SC_ENA_WIDTH 1 /* (0x04A1) - HP2 Short Circuit Ctrl */ #define MADERA_HP2_SC_ENA 0x1000 #define MADERA_HP2_SC_ENA_MASK 0x1000 #define MADERA_HP2_SC_ENA_SHIFT 12 -#define MADERA_HP2_SC_ENA_WIDTH 1 /* (0x04A2) - HP3 Short Circuit Ctrl */ #define MADERA_HP3_SC_ENA 0x1000 #define MADERA_HP3_SC_ENA_MASK 0x1000 #define MADERA_HP3_SC_ENA_SHIFT 12 -#define MADERA_HP3_SC_ENA_WIDTH 1 /* (0x04A8) - HP_Test_Ctrl_5 */ #define MADERA_HP1L_ONEFLT 0x0100 #define MADERA_HP1L_ONEFLT_MASK 0x0100 #define MADERA_HP1L_ONEFLT_SHIFT 8 -#define MADERA_HP1L_ONEFLT_WIDTH 1 /* (0x04A9) - HP_Test_Ctrl_6 */ #define MADERA_HP1R_ONEFLT 0x0100 #define MADERA_HP1R_ONEFLT_MASK 0x0100 #define MADERA_HP1R_ONEFLT_SHIFT 8 -#define MADERA_HP1R_ONEFLT_WIDTH 1 /* (0x0500) AIF1_BCLK_Ctrl */ #define MADERA_AIF1_BCLK_INV 0x0080 #define MADERA_AIF1_BCLK_INV_MASK 0x0080 #define MADERA_AIF1_BCLK_INV_SHIFT 7 -#define MADERA_AIF1_BCLK_INV_WIDTH 1 #define MADERA_AIF1_BCLK_MSTR 0x0020 #define MADERA_AIF1_BCLK_MSTR_MASK 0x0020 #define MADERA_AIF1_BCLK_MSTR_SHIFT 5 -#define MADERA_AIF1_BCLK_MSTR_WIDTH 1 #define MADERA_AIF1_BCLK_FREQ_MASK 0x001F #define MADERA_AIF1_BCLK_FREQ_SHIFT 0 -#define MADERA_AIF1_BCLK_FREQ_WIDTH 5 /* (0x0501) AIF1_Tx_Pin_Ctrl */ #define MADERA_AIF1TX_LRCLK_SRC 0x0008 #define MADERA_AIF1TX_LRCLK_SRC_MASK 0x0008 #define MADERA_AIF1TX_LRCLK_SRC_SHIFT 3 -#define MADERA_AIF1TX_LRCLK_SRC_WIDTH 1 #define MADERA_AIF1TX_LRCLK_INV 0x0004 #define MADERA_AIF1TX_LRCLK_INV_MASK 0x0004 #define MADERA_AIF1TX_LRCLK_INV_SHIFT 2 -#define MADERA_AIF1TX_LRCLK_INV_WIDTH 1 #define MADERA_AIF1TX_LRCLK_MSTR 0x0001 #define MADERA_AIF1TX_LRCLK_MSTR_MASK 0x0001 #define MADERA_AIF1TX_LRCLK_MSTR_SHIFT 0 -#define MADERA_AIF1TX_LRCLK_MSTR_WIDTH 1 /* (0x0502) AIF1_Rx_Pin_Ctrl */ #define MADERA_AIF1RX_LRCLK_INV 0x0004 #define MADERA_AIF1RX_LRCLK_INV_MASK 0x0004 #define MADERA_AIF1RX_LRCLK_INV_SHIFT 2 -#define MADERA_AIF1RX_LRCLK_INV_WIDTH 1 #define MADERA_AIF1RX_LRCLK_FRC 0x0002 #define MADERA_AIF1RX_LRCLK_FRC_MASK 0x0002 #define MADERA_AIF1RX_LRCLK_FRC_SHIFT 1 -#define MADERA_AIF1RX_LRCLK_FRC_WIDTH 1 #define MADERA_AIF1RX_LRCLK_MSTR 0x0001 #define MADERA_AIF1RX_LRCLK_MSTR_MASK 0x0001 #define MADERA_AIF1RX_LRCLK_MSTR_SHIFT 0 -#define MADERA_AIF1RX_LRCLK_MSTR_WIDTH 1 /* (0x0503) AIF1_Rate_Ctrl */ #define MADERA_AIF1_RATE_MASK 0xF800 #define MADERA_AIF1_RATE_SHIFT 11 -#define MADERA_AIF1_RATE_WIDTH 5 #define MADERA_AIF1_TRI 0x0040 #define MADERA_AIF1_TRI_MASK 0x0040 #define MADERA_AIF1_TRI_SHIFT 6 -#define MADERA_AIF1_TRI_WIDTH 1 /* (0x0504) AIF1_Format */ #define MADERA_AIF1_FMT_MASK 0x0007 #define MADERA_AIF1_FMT_SHIFT 0 -#define MADERA_AIF1_FMT_WIDTH 3 /* (0x0506) AIF1_Rx_BCLK_Rate */ #define MADERA_AIF1RX_BCPF_MASK 0x1FFF #define MADERA_AIF1RX_BCPF_SHIFT 0 -#define MADERA_AIF1RX_BCPF_WIDTH 13 /* (0x0507) AIF1_Frame_Ctrl_1 */ #define MADERA_AIF1TX_WL_MASK 0x3F00 #define MADERA_AIF1TX_WL_SHIFT 8 -#define MADERA_AIF1TX_WL_WIDTH 6 #define MADERA_AIF1TX_SLOT_LEN_MASK 0x00FF #define MADERA_AIF1TX_SLOT_LEN_SHIFT 0 -#define MADERA_AIF1TX_SLOT_LEN_WIDTH 8 /* (0x0508) AIF1_Frame_Ctrl_2 */ #define MADERA_AIF1RX_WL_MASK 0x3F00 #define MADERA_AIF1RX_WL_SHIFT 8 -#define MADERA_AIF1RX_WL_WIDTH 6 #define MADERA_AIF1RX_SLOT_LEN_MASK 0x00FF #define MADERA_AIF1RX_SLOT_LEN_SHIFT 0 -#define MADERA_AIF1RX_SLOT_LEN_WIDTH 8 /* (0x0509) AIF1_Frame_Ctrl_3 */ #define MADERA_AIF1TX1_SLOT_MASK 0x003F #define MADERA_AIF1TX1_SLOT_SHIFT 0 -#define MADERA_AIF1TX1_SLOT_WIDTH 6 /* (0x0519) AIF1_Tx_Enables */ #define MADERA_AIF1TX8_ENA 0x0080 #define MADERA_AIF1TX8_ENA_MASK 0x0080 #define MADERA_AIF1TX8_ENA_SHIFT 7 -#define MADERA_AIF1TX8_ENA_WIDTH 1 #define MADERA_AIF1TX7_ENA 0x0040 #define MADERA_AIF1TX7_ENA_MASK 0x0040 #define MADERA_AIF1TX7_ENA_SHIFT 6 -#define MADERA_AIF1TX7_ENA_WIDTH 1 #define MADERA_AIF1TX6_ENA 0x0020 #define MADERA_AIF1TX6_ENA_MASK 0x0020 #define MADERA_AIF1TX6_ENA_SHIFT 5 -#define MADERA_AIF1TX6_ENA_WIDTH 1 #define MADERA_AIF1TX5_ENA 0x0010 #define MADERA_AIF1TX5_ENA_MASK 0x0010 #define MADERA_AIF1TX5_ENA_SHIFT 4 -#define MADERA_AIF1TX5_ENA_WIDTH 1 #define MADERA_AIF1TX4_ENA 0x0008 #define MADERA_AIF1TX4_ENA_MASK 0x0008 #define MADERA_AIF1TX4_ENA_SHIFT 3 -#define MADERA_AIF1TX4_ENA_WIDTH 1 #define MADERA_AIF1TX3_ENA 0x0004 #define MADERA_AIF1TX3_ENA_MASK 0x0004 #define MADERA_AIF1TX3_ENA_SHIFT 2 -#define MADERA_AIF1TX3_ENA_WIDTH 1 #define MADERA_AIF1TX2_ENA 0x0002 #define MADERA_AIF1TX2_ENA_MASK 0x0002 #define MADERA_AIF1TX2_ENA_SHIFT 1 -#define MADERA_AIF1TX2_ENA_WIDTH 1 #define MADERA_AIF1TX1_ENA 0x0001 #define MADERA_AIF1TX1_ENA_MASK 0x0001 #define MADERA_AIF1TX1_ENA_SHIFT 0 -#define MADERA_AIF1TX1_ENA_WIDTH 1 /* (0x051A) AIF1_Rx_Enables */ #define MADERA_AIF1RX8_ENA 0x0080 #define MADERA_AIF1RX8_ENA_MASK 0x0080 #define MADERA_AIF1RX8_ENA_SHIFT 7 -#define MADERA_AIF1RX8_ENA_WIDTH 1 #define MADERA_AIF1RX7_ENA 0x0040 #define MADERA_AIF1RX7_ENA_MASK 0x0040 #define MADERA_AIF1RX7_ENA_SHIFT 6 -#define MADERA_AIF1RX7_ENA_WIDTH 1 #define MADERA_AIF1RX6_ENA 0x0020 #define MADERA_AIF1RX6_ENA_MASK 0x0020 #define MADERA_AIF1RX6_ENA_SHIFT 5 -#define MADERA_AIF1RX6_ENA_WIDTH 1 #define MADERA_AIF1RX5_ENA 0x0010 #define MADERA_AIF1RX5_ENA_MASK 0x0010 #define MADERA_AIF1RX5_ENA_SHIFT 4 -#define MADERA_AIF1RX5_ENA_WIDTH 1 #define MADERA_AIF1RX4_ENA 0x0008 #define MADERA_AIF1RX4_ENA_MASK 0x0008 #define MADERA_AIF1RX4_ENA_SHIFT 3 -#define MADERA_AIF1RX4_ENA_WIDTH 1 #define MADERA_AIF1RX3_ENA 0x0004 #define MADERA_AIF1RX3_ENA_MASK 0x0004 #define MADERA_AIF1RX3_ENA_SHIFT 2 -#define MADERA_AIF1RX3_ENA_WIDTH 1 #define MADERA_AIF1RX2_ENA 0x0002 #define MADERA_AIF1RX2_ENA_MASK 0x0002 #define MADERA_AIF1RX2_ENA_SHIFT 1 -#define MADERA_AIF1RX2_ENA_WIDTH 1 #define MADERA_AIF1RX1_ENA 0x0001 #define MADERA_AIF1RX1_ENA_MASK 0x0001 #define MADERA_AIF1RX1_ENA_SHIFT 0 -#define MADERA_AIF1RX1_ENA_WIDTH 1 /* (0x0559) AIF2_Tx_Enables */ #define MADERA_AIF2TX8_ENA 0x0080 #define MADERA_AIF2TX8_ENA_MASK 0x0080 #define MADERA_AIF2TX8_ENA_SHIFT 7 -#define MADERA_AIF2TX8_ENA_WIDTH 1 #define MADERA_AIF2TX7_ENA 0x0040 #define MADERA_AIF2TX7_ENA_MASK 0x0040 #define MADERA_AIF2TX7_ENA_SHIFT 6 -#define MADERA_AIF2TX7_ENA_WIDTH 1 #define MADERA_AIF2TX6_ENA 0x0020 #define MADERA_AIF2TX6_ENA_MASK 0x0020 #define MADERA_AIF2TX6_ENA_SHIFT 5 -#define MADERA_AIF2TX6_ENA_WIDTH 1 #define MADERA_AIF2TX5_ENA 0x0010 #define MADERA_AIF2TX5_ENA_MASK 0x0010 #define MADERA_AIF2TX5_ENA_SHIFT 4 -#define MADERA_AIF2TX5_ENA_WIDTH 1 #define MADERA_AIF2TX4_ENA 0x0008 #define MADERA_AIF2TX4_ENA_MASK 0x0008 #define MADERA_AIF2TX4_ENA_SHIFT 3 -#define MADERA_AIF2TX4_ENA_WIDTH 1 #define MADERA_AIF2TX3_ENA 0x0004 #define MADERA_AIF2TX3_ENA_MASK 0x0004 #define MADERA_AIF2TX3_ENA_SHIFT 2 -#define MADERA_AIF2TX3_ENA_WIDTH 1 #define MADERA_AIF2TX2_ENA 0x0002 #define MADERA_AIF2TX2_ENA_MASK 0x0002 #define MADERA_AIF2TX2_ENA_SHIFT 1 -#define MADERA_AIF2TX2_ENA_WIDTH 1 #define MADERA_AIF2TX1_ENA 0x0001 #define MADERA_AIF2TX1_ENA_MASK 0x0001 #define MADERA_AIF2TX1_ENA_SHIFT 0 -#define MADERA_AIF2TX1_ENA_WIDTH 1 /* (0x055A) AIF2_Rx_Enables */ #define MADERA_AIF2RX8_ENA 0x0080 #define MADERA_AIF2RX8_ENA_MASK 0x0080 #define MADERA_AIF2RX8_ENA_SHIFT 7 -#define MADERA_AIF2RX8_ENA_WIDTH 1 #define MADERA_AIF2RX7_ENA 0x0040 #define MADERA_AIF2RX7_ENA_MASK 0x0040 #define MADERA_AIF2RX7_ENA_SHIFT 6 -#define MADERA_AIF2RX7_ENA_WIDTH 1 #define MADERA_AIF2RX6_ENA 0x0020 #define MADERA_AIF2RX6_ENA_MASK 0x0020 #define MADERA_AIF2RX6_ENA_SHIFT 5 -#define MADERA_AIF2RX6_ENA_WIDTH 1 #define MADERA_AIF2RX5_ENA 0x0010 #define MADERA_AIF2RX5_ENA_MASK 0x0010 #define MADERA_AIF2RX5_ENA_SHIFT 4 -#define MADERA_AIF2RX5_ENA_WIDTH 1 #define MADERA_AIF2RX4_ENA 0x0008 #define MADERA_AIF2RX4_ENA_MASK 0x0008 #define MADERA_AIF2RX4_ENA_SHIFT 3 -#define MADERA_AIF2RX4_ENA_WIDTH 1 #define MADERA_AIF2RX3_ENA 0x0004 #define MADERA_AIF2RX3_ENA_MASK 0x0004 #define MADERA_AIF2RX3_ENA_SHIFT 2 -#define MADERA_AIF2RX3_ENA_WIDTH 1 #define MADERA_AIF2RX2_ENA 0x0002 #define MADERA_AIF2RX2_ENA_MASK 0x0002 #define MADERA_AIF2RX2_ENA_SHIFT 1 -#define MADERA_AIF2RX2_ENA_WIDTH 1 #define MADERA_AIF2RX1_ENA 0x0001 #define MADERA_AIF2RX1_ENA_MASK 0x0001 #define MADERA_AIF2RX1_ENA_SHIFT 0 -#define MADERA_AIF2RX1_ENA_WIDTH 1 /* (0x0599) AIF3_Tx_Enables */ #define MADERA_AIF3TX8_ENA 0x0080 #define MADERA_AIF3TX8_ENA_MASK 0x0080 #define MADERA_AIF3TX8_ENA_SHIFT 7 -#define MADERA_AIF3TX8_ENA_WIDTH 1 #define MADERA_AIF3TX7_ENA 0x0040 #define MADERA_AIF3TX7_ENA_MASK 0x0040 #define MADERA_AIF3TX7_ENA_SHIFT 6 -#define MADERA_AIF3TX7_ENA_WIDTH 1 #define MADERA_AIF3TX6_ENA 0x0020 #define MADERA_AIF3TX6_ENA_MASK 0x0020 #define MADERA_AIF3TX6_ENA_SHIFT 5 -#define MADERA_AIF3TX6_ENA_WIDTH 1 #define MADERA_AIF3TX5_ENA 0x0010 #define MADERA_AIF3TX5_ENA_MASK 0x0010 #define MADERA_AIF3TX5_ENA_SHIFT 4 -#define MADERA_AIF3TX5_ENA_WIDTH 1 #define MADERA_AIF3TX4_ENA 0x0008 #define MADERA_AIF3TX4_ENA_MASK 0x0008 #define MADERA_AIF3TX4_ENA_SHIFT 3 -#define MADERA_AIF3TX4_ENA_WIDTH 1 #define MADERA_AIF3TX3_ENA 0x0004 #define MADERA_AIF3TX3_ENA_MASK 0x0004 #define MADERA_AIF3TX3_ENA_SHIFT 2 -#define MADERA_AIF3TX3_ENA_WIDTH 1 #define MADERA_AIF3TX2_ENA 0x0002 #define MADERA_AIF3TX2_ENA_MASK 0x0002 #define MADERA_AIF3TX2_ENA_SHIFT 1 -#define MADERA_AIF3TX2_ENA_WIDTH 1 #define MADERA_AIF3TX1_ENA 0x0001 #define MADERA_AIF3TX1_ENA_MASK 0x0001 #define MADERA_AIF3TX1_ENA_SHIFT 0 -#define MADERA_AIF3TX1_ENA_WIDTH 1 /* (0x059A) AIF3_Rx_Enables */ #define MADERA_AIF3RX8_ENA 0x0080 #define MADERA_AIF3RX8_ENA_MASK 0x0080 #define MADERA_AIF3RX8_ENA_SHIFT 7 -#define MADERA_AIF3RX8_ENA_WIDTH 1 #define MADERA_AIF3RX7_ENA 0x0040 #define MADERA_AIF3RX7_ENA_MASK 0x0040 #define MADERA_AIF3RX7_ENA_SHIFT 6 -#define MADERA_AIF3RX7_ENA_WIDTH 1 #define MADERA_AIF3RX6_ENA 0x0020 #define MADERA_AIF3RX6_ENA_MASK 0x0020 #define MADERA_AIF3RX6_ENA_SHIFT 5 -#define MADERA_AIF3RX6_ENA_WIDTH 1 #define MADERA_AIF3RX5_ENA 0x0010 #define MADERA_AIF3RX5_ENA_MASK 0x0010 #define MADERA_AIF3RX5_ENA_SHIFT 4 -#define MADERA_AIF3RX5_ENA_WIDTH 1 #define MADERA_AIF3RX4_ENA 0x0008 #define MADERA_AIF3RX4_ENA_MASK 0x0008 #define MADERA_AIF3RX4_ENA_SHIFT 3 -#define MADERA_AIF3RX4_ENA_WIDTH 1 #define MADERA_AIF3RX3_ENA 0x0004 #define MADERA_AIF3RX3_ENA_MASK 0x0004 #define MADERA_AIF3RX3_ENA_SHIFT 2 -#define MADERA_AIF3RX3_ENA_WIDTH 1 #define MADERA_AIF3RX2_ENA 0x0002 #define MADERA_AIF3RX2_ENA_MASK 0x0002 #define MADERA_AIF3RX2_ENA_SHIFT 1 -#define MADERA_AIF3RX2_ENA_WIDTH 1 #define MADERA_AIF3RX1_ENA 0x0001 #define MADERA_AIF3RX1_ENA_MASK 0x0001 #define MADERA_AIF3RX1_ENA_SHIFT 0 -#define MADERA_AIF3RX1_ENA_WIDTH 1 /* (0x05B9) AIF4_Tx_Enables */ #define MADERA_AIF4TX2_ENA 0x0002 #define MADERA_AIF4TX2_ENA_MASK 0x0002 #define MADERA_AIF4TX2_ENA_SHIFT 1 -#define MADERA_AIF4TX2_ENA_WIDTH 1 #define MADERA_AIF4TX1_ENA 0x0001 #define MADERA_AIF4TX1_ENA_MASK 0x0001 #define MADERA_AIF4TX1_ENA_SHIFT 0 -#define MADERA_AIF4TX1_ENA_WIDTH 1 /* (0x05BA) AIF4_Rx_Enables */ #define MADERA_AIF4RX2_ENA 0x0002 #define MADERA_AIF4RX2_ENA_MASK 0x0002 #define MADERA_AIF4RX2_ENA_SHIFT 1 -#define MADERA_AIF4RX2_ENA_WIDTH 1 #define MADERA_AIF4RX1_ENA 0x0001 #define MADERA_AIF4RX1_ENA_MASK 0x0001 #define MADERA_AIF4RX1_ENA_SHIFT 0 -#define MADERA_AIF4RX1_ENA_WIDTH 1 /* (0x05C2) SPD1_TX_Control */ #define MADERA_SPD1_VAL2 0x2000 #define MADERA_SPD1_VAL2_MASK 0x2000 #define MADERA_SPD1_VAL2_SHIFT 13 -#define MADERA_SPD1_VAL2_WIDTH 1 #define MADERA_SPD1_VAL1 0x1000 #define MADERA_SPD1_VAL1_MASK 0x1000 #define MADERA_SPD1_VAL1_SHIFT 12 -#define MADERA_SPD1_VAL1_WIDTH 1 #define MADERA_SPD1_RATE_MASK 0x00F0 #define MADERA_SPD1_RATE_SHIFT 4 -#define MADERA_SPD1_RATE_WIDTH 4 #define MADERA_SPD1_ENA 0x0001 #define MADERA_SPD1_ENA_MASK 0x0001 #define MADERA_SPD1_ENA_SHIFT 0 -#define MADERA_SPD1_ENA_WIDTH 1 /* (0x05F5) SLIMbus_RX_Channel_Enable */ #define MADERA_SLIMRX8_ENA 0x0080 #define MADERA_SLIMRX8_ENA_MASK 0x0080 #define MADERA_SLIMRX8_ENA_SHIFT 7 -#define MADERA_SLIMRX8_ENA_WIDTH 1 #define MADERA_SLIMRX7_ENA 0x0040 #define MADERA_SLIMRX7_ENA_MASK 0x0040 #define MADERA_SLIMRX7_ENA_SHIFT 6 -#define MADERA_SLIMRX7_ENA_WIDTH 1 #define MADERA_SLIMRX6_ENA 0x0020 #define MADERA_SLIMRX6_ENA_MASK 0x0020 #define MADERA_SLIMRX6_ENA_SHIFT 5 -#define MADERA_SLIMRX6_ENA_WIDTH 1 #define MADERA_SLIMRX5_ENA 0x0010 #define MADERA_SLIMRX5_ENA_MASK 0x0010 #define MADERA_SLIMRX5_ENA_SHIFT 4 -#define MADERA_SLIMRX5_ENA_WIDTH 1 #define MADERA_SLIMRX4_ENA 0x0008 #define MADERA_SLIMRX4_ENA_MASK 0x0008 #define MADERA_SLIMRX4_ENA_SHIFT 3 -#define MADERA_SLIMRX4_ENA_WIDTH 1 #define MADERA_SLIMRX3_ENA 0x0004 #define MADERA_SLIMRX3_ENA_MASK 0x0004 #define MADERA_SLIMRX3_ENA_SHIFT 2 -#define MADERA_SLIMRX3_ENA_WIDTH 1 #define MADERA_SLIMRX2_ENA 0x0002 #define MADERA_SLIMRX2_ENA_MASK 0x0002 #define MADERA_SLIMRX2_ENA_SHIFT 1 -#define MADERA_SLIMRX2_ENA_WIDTH 1 #define MADERA_SLIMRX1_ENA 0x0001 #define MADERA_SLIMRX1_ENA_MASK 0x0001 #define MADERA_SLIMRX1_ENA_SHIFT 0 -#define MADERA_SLIMRX1_ENA_WIDTH 1 /* (0x05F6) SLIMbus_TX_Channel_Enable */ #define MADERA_SLIMTX8_ENA 0x0080 #define MADERA_SLIMTX8_ENA_MASK 0x0080 #define MADERA_SLIMTX8_ENA_SHIFT 7 -#define MADERA_SLIMTX8_ENA_WIDTH 1 #define MADERA_SLIMTX7_ENA 0x0040 #define MADERA_SLIMTX7_ENA_MASK 0x0040 #define MADERA_SLIMTX7_ENA_SHIFT 6 -#define MADERA_SLIMTX7_ENA_WIDTH 1 #define MADERA_SLIMTX6_ENA 0x0020 #define MADERA_SLIMTX6_ENA_MASK 0x0020 #define MADERA_SLIMTX6_ENA_SHIFT 5 -#define MADERA_SLIMTX6_ENA_WIDTH 1 #define MADERA_SLIMTX5_ENA 0x0010 #define MADERA_SLIMTX5_ENA_MASK 0x0010 #define MADERA_SLIMTX5_ENA_SHIFT 4 -#define MADERA_SLIMTX5_ENA_WIDTH 1 #define MADERA_SLIMTX4_ENA 0x0008 #define MADERA_SLIMTX4_ENA_MASK 0x0008 #define MADERA_SLIMTX4_ENA_SHIFT 3 -#define MADERA_SLIMTX4_ENA_WIDTH 1 #define MADERA_SLIMTX3_ENA 0x0004 #define MADERA_SLIMTX3_ENA_MASK 0x0004 #define MADERA_SLIMTX3_ENA_SHIFT 2 -#define MADERA_SLIMTX3_ENA_WIDTH 1 #define MADERA_SLIMTX2_ENA 0x0002 #define MADERA_SLIMTX2_ENA_MASK 0x0002 #define MADERA_SLIMTX2_ENA_SHIFT 1 -#define MADERA_SLIMTX2_ENA_WIDTH 1 #define MADERA_SLIMTX1_ENA 0x0001 #define MADERA_SLIMTX1_ENA_MASK 0x0001 #define MADERA_SLIMTX1_ENA_SHIFT 0 -#define MADERA_SLIMTX1_ENA_WIDTH 1 /* (0x0E10) EQ1_1 */ #define MADERA_EQ1_B1_GAIN_MASK 0xF800 #define MADERA_EQ1_B1_GAIN_SHIFT 11 -#define MADERA_EQ1_B1_GAIN_WIDTH 5 #define MADERA_EQ1_B2_GAIN_MASK 0x07C0 #define MADERA_EQ1_B2_GAIN_SHIFT 6 -#define MADERA_EQ1_B2_GAIN_WIDTH 5 #define MADERA_EQ1_B3_GAIN_MASK 0x003E #define MADERA_EQ1_B3_GAIN_SHIFT 1 -#define MADERA_EQ1_B3_GAIN_WIDTH 5 #define MADERA_EQ1_ENA 0x0001 #define MADERA_EQ1_ENA_MASK 0x0001 #define MADERA_EQ1_ENA_SHIFT 0 -#define MADERA_EQ1_ENA_WIDTH 1 /* (0x0E11) EQ1_2 */ #define MADERA_EQ1_B4_GAIN_MASK 0xF800 #define MADERA_EQ1_B4_GAIN_SHIFT 11 -#define MADERA_EQ1_B4_GAIN_WIDTH 5 #define MADERA_EQ1_B5_GAIN_MASK 0x07C0 #define MADERA_EQ1_B5_GAIN_SHIFT 6 -#define MADERA_EQ1_B5_GAIN_WIDTH 5 #define MADERA_EQ1_B1_MODE 0x0001 #define MADERA_EQ1_B1_MODE_MASK 0x0001 #define MADERA_EQ1_B1_MODE_SHIFT 0 -#define MADERA_EQ1_B1_MODE_WIDTH 1 /* (0x0E26) EQ2_1 */ #define MADERA_EQ2_B1_GAIN_MASK 0xF800 #define MADERA_EQ2_B1_GAIN_SHIFT 11 -#define MADERA_EQ2_B1_GAIN_WIDTH 5 #define MADERA_EQ2_B2_GAIN_MASK 0x07C0 #define MADERA_EQ2_B2_GAIN_SHIFT 6 -#define MADERA_EQ2_B2_GAIN_WIDTH 5 #define MADERA_EQ2_B3_GAIN_MASK 0x003E #define MADERA_EQ2_B3_GAIN_SHIFT 1 -#define MADERA_EQ2_B3_GAIN_WIDTH 5 #define MADERA_EQ2_ENA 0x0001 #define MADERA_EQ2_ENA_MASK 0x0001 #define MADERA_EQ2_ENA_SHIFT 0 -#define MADERA_EQ2_ENA_WIDTH 1 /* (0x0E27) EQ2_2 */ #define MADERA_EQ2_B4_GAIN_MASK 0xF800 #define MADERA_EQ2_B4_GAIN_SHIFT 11 -#define MADERA_EQ2_B4_GAIN_WIDTH 5 #define MADERA_EQ2_B5_GAIN_MASK 0x07C0 #define MADERA_EQ2_B5_GAIN_SHIFT 6 -#define MADERA_EQ2_B5_GAIN_WIDTH 5 #define MADERA_EQ2_B1_MODE 0x0001 #define MADERA_EQ2_B1_MODE_MASK 0x0001 #define MADERA_EQ2_B1_MODE_SHIFT 0 -#define MADERA_EQ2_B1_MODE_WIDTH 1 /* (0x0E3C) EQ3_1 */ #define MADERA_EQ3_B1_GAIN_MASK 0xF800 #define MADERA_EQ3_B1_GAIN_SHIFT 11 -#define MADERA_EQ3_B1_GAIN_WIDTH 5 #define MADERA_EQ3_B2_GAIN_MASK 0x07C0 #define MADERA_EQ3_B2_GAIN_SHIFT 6 -#define MADERA_EQ3_B2_GAIN_WIDTH 5 #define MADERA_EQ3_B3_GAIN_MASK 0x003E #define MADERA_EQ3_B3_GAIN_SHIFT 1 -#define MADERA_EQ3_B3_GAIN_WIDTH 5 #define MADERA_EQ3_ENA 0x0001 #define MADERA_EQ3_ENA_MASK 0x0001 #define MADERA_EQ3_ENA_SHIFT 0 -#define MADERA_EQ3_ENA_WIDTH 1 /* (0x0E3D) EQ3_2 */ #define MADERA_EQ3_B4_GAIN_MASK 0xF800 #define MADERA_EQ3_B4_GAIN_SHIFT 11 -#define MADERA_EQ3_B4_GAIN_WIDTH 5 #define MADERA_EQ3_B5_GAIN_MASK 0x07C0 #define MADERA_EQ3_B5_GAIN_SHIFT 6 -#define MADERA_EQ3_B5_GAIN_WIDTH 5 #define MADERA_EQ3_B1_MODE 0x0001 #define MADERA_EQ3_B1_MODE_MASK 0x0001 #define MADERA_EQ3_B1_MODE_SHIFT 0 -#define MADERA_EQ3_B1_MODE_WIDTH 1 /* (0x0E52) EQ4_1 */ #define MADERA_EQ4_B1_GAIN_MASK 0xF800 #define MADERA_EQ4_B1_GAIN_SHIFT 11 -#define MADERA_EQ4_B1_GAIN_WIDTH 5 #define MADERA_EQ4_B2_GAIN_MASK 0x07C0 #define MADERA_EQ4_B2_GAIN_SHIFT 6 -#define MADERA_EQ4_B2_GAIN_WIDTH 5 #define MADERA_EQ4_B3_GAIN_MASK 0x003E #define MADERA_EQ4_B3_GAIN_SHIFT 1 -#define MADERA_EQ4_B3_GAIN_WIDTH 5 #define MADERA_EQ4_ENA 0x0001 #define MADERA_EQ4_ENA_MASK 0x0001 #define MADERA_EQ4_ENA_SHIFT 0 -#define MADERA_EQ4_ENA_WIDTH 1 /* (0x0E53) EQ4_2 */ #define MADERA_EQ4_B4_GAIN_MASK 0xF800 #define MADERA_EQ4_B4_GAIN_SHIFT 11 -#define MADERA_EQ4_B4_GAIN_WIDTH 5 #define MADERA_EQ4_B5_GAIN_MASK 0x07C0 #define MADERA_EQ4_B5_GAIN_SHIFT 6 -#define MADERA_EQ4_B5_GAIN_WIDTH 5 #define MADERA_EQ4_B1_MODE 0x0001 #define MADERA_EQ4_B1_MODE_MASK 0x0001 #define MADERA_EQ4_B1_MODE_SHIFT 0 -#define MADERA_EQ4_B1_MODE_WIDTH 1 /* (0x0E80) DRC1_ctrl1 */ #define MADERA_DRC1L_ENA 0x0002 #define MADERA_DRC1L_ENA_MASK 0x0002 #define MADERA_DRC1L_ENA_SHIFT 1 -#define MADERA_DRC1L_ENA_WIDTH 1 #define MADERA_DRC1R_ENA 0x0001 #define MADERA_DRC1R_ENA_MASK 0x0001 #define MADERA_DRC1R_ENA_SHIFT 0 -#define MADERA_DRC1R_ENA_WIDTH 1 /* (0x0E88) DRC2_ctrl1 */ #define MADERA_DRC2L_ENA 0x0002 #define MADERA_DRC2L_ENA_MASK 0x0002 #define MADERA_DRC2L_ENA_SHIFT 1 -#define MADERA_DRC2L_ENA_WIDTH 1 #define MADERA_DRC2R_ENA 0x0001 #define MADERA_DRC2R_ENA_MASK 0x0001 #define MADERA_DRC2R_ENA_SHIFT 0 -#define MADERA_DRC2R_ENA_WIDTH 1 /* (0x0EC0) HPLPF1_1 */ #define MADERA_LHPF1_MODE 0x0002 #define MADERA_LHPF1_MODE_MASK 0x0002 #define MADERA_LHPF1_MODE_SHIFT 1 -#define MADERA_LHPF1_MODE_WIDTH 1 #define MADERA_LHPF1_ENA 0x0001 #define MADERA_LHPF1_ENA_MASK 0x0001 #define MADERA_LHPF1_ENA_SHIFT 0 -#define MADERA_LHPF1_ENA_WIDTH 1 /* (0x0EC1) HPLPF1_2 */ #define MADERA_LHPF1_COEFF_MASK 0xFFFF #define MADERA_LHPF1_COEFF_SHIFT 0 -#define MADERA_LHPF1_COEFF_WIDTH 16 /* (0x0EC4) HPLPF2_1 */ #define MADERA_LHPF2_MODE 0x0002 #define MADERA_LHPF2_MODE_MASK 0x0002 #define MADERA_LHPF2_MODE_SHIFT 1 -#define MADERA_LHPF2_MODE_WIDTH 1 #define MADERA_LHPF2_ENA 0x0001 #define MADERA_LHPF2_ENA_MASK 0x0001 #define MADERA_LHPF2_ENA_SHIFT 0 -#define MADERA_LHPF2_ENA_WIDTH 1 /* (0x0EC5) HPLPF2_2 */ #define MADERA_LHPF2_COEFF_MASK 0xFFFF #define MADERA_LHPF2_COEFF_SHIFT 0 -#define MADERA_LHPF2_COEFF_WIDTH 16 /* (0x0EC8) HPLPF3_1 */ #define MADERA_LHPF3_MODE 0x0002 #define MADERA_LHPF3_MODE_MASK 0x0002 #define MADERA_LHPF3_MODE_SHIFT 1 -#define MADERA_LHPF3_MODE_WIDTH 1 #define MADERA_LHPF3_ENA 0x0001 #define MADERA_LHPF3_ENA_MASK 0x0001 #define MADERA_LHPF3_ENA_SHIFT 0 -#define MADERA_LHPF3_ENA_WIDTH 1 /* (0x0EC9) HPLPF3_2 */ #define MADERA_LHPF3_COEFF_MASK 0xFFFF #define MADERA_LHPF3_COEFF_SHIFT 0 -#define MADERA_LHPF3_COEFF_WIDTH 16 /* (0x0ECC) HPLPF4_1 */ #define MADERA_LHPF4_MODE 0x0002 #define MADERA_LHPF4_MODE_MASK 0x0002 #define MADERA_LHPF4_MODE_SHIFT 1 -#define MADERA_LHPF4_MODE_WIDTH 1 #define MADERA_LHPF4_ENA 0x0001 #define MADERA_LHPF4_ENA_MASK 0x0001 #define MADERA_LHPF4_ENA_SHIFT 0 -#define MADERA_LHPF4_ENA_WIDTH 1 /* (0x0ECD) HPLPF4_2 */ #define MADERA_LHPF4_COEFF_MASK 0xFFFF #define MADERA_LHPF4_COEFF_SHIFT 0 -#define MADERA_LHPF4_COEFF_WIDTH 16 /* (0x0ED0) ASRC2_ENABLE */ #define MADERA_ASRC2_IN2L_ENA 0x0008 #define MADERA_ASRC2_IN2L_ENA_MASK 0x0008 #define MADERA_ASRC2_IN2L_ENA_SHIFT 3 -#define MADERA_ASRC2_IN2L_ENA_WIDTH 1 #define MADERA_ASRC2_IN2R_ENA 0x0004 #define MADERA_ASRC2_IN2R_ENA_MASK 0x0004 #define MADERA_ASRC2_IN2R_ENA_SHIFT 2 -#define MADERA_ASRC2_IN2R_ENA_WIDTH 1 #define MADERA_ASRC2_IN1L_ENA 0x0002 #define MADERA_ASRC2_IN1L_ENA_MASK 0x0002 #define MADERA_ASRC2_IN1L_ENA_SHIFT 1 -#define MADERA_ASRC2_IN1L_ENA_WIDTH 1 #define MADERA_ASRC2_IN1R_ENA 0x0001 #define MADERA_ASRC2_IN1R_ENA_MASK 0x0001 #define MADERA_ASRC2_IN1R_ENA_SHIFT 0 -#define MADERA_ASRC2_IN1R_ENA_WIDTH 1 /* (0x0ED2) ASRC2_RATE1 */ #define MADERA_ASRC2_RATE1_MASK 0xF800 #define MADERA_ASRC2_RATE1_SHIFT 11 -#define MADERA_ASRC2_RATE1_WIDTH 5 /* (0x0ED3) ASRC2_RATE2 */ #define MADERA_ASRC2_RATE2_MASK 0xF800 #define MADERA_ASRC2_RATE2_SHIFT 11 -#define MADERA_ASRC2_RATE2_WIDTH 5 /* (0x0EE0) ASRC1_ENABLE */ #define MADERA_ASRC1_IN2L_ENA 0x0008 #define MADERA_ASRC1_IN2L_ENA_MASK 0x0008 #define MADERA_ASRC1_IN2L_ENA_SHIFT 3 -#define MADERA_ASRC1_IN2L_ENA_WIDTH 1 #define MADERA_ASRC1_IN2R_ENA 0x0004 #define MADERA_ASRC1_IN2R_ENA_MASK 0x0004 #define MADERA_ASRC1_IN2R_ENA_SHIFT 2 -#define MADERA_ASRC1_IN2R_ENA_WIDTH 1 #define MADERA_ASRC1_IN1L_ENA 0x0002 #define MADERA_ASRC1_IN1L_ENA_MASK 0x0002 #define MADERA_ASRC1_IN1L_ENA_SHIFT 1 -#define MADERA_ASRC1_IN1L_ENA_WIDTH 1 #define MADERA_ASRC1_IN1R_ENA 0x0001 #define MADERA_ASRC1_IN1R_ENA_MASK 0x0001 #define MADERA_ASRC1_IN1R_ENA_SHIFT 0 -#define MADERA_ASRC1_IN1R_ENA_WIDTH 1 /* (0x0EE2) ASRC1_RATE1 */ #define MADERA_ASRC1_RATE1_MASK 0xF800 #define MADERA_ASRC1_RATE1_SHIFT 11 -#define MADERA_ASRC1_RATE1_WIDTH 5 /* (0x0EE3) ASRC1_RATE2 */ #define MADERA_ASRC1_RATE2_MASK 0xF800 #define MADERA_ASRC1_RATE2_SHIFT 11 -#define MADERA_ASRC1_RATE2_WIDTH 5 /* (0x0EF0) - ISRC1 CTRL 1 */ #define MADERA_ISRC1_FSH_MASK 0xF800 #define MADERA_ISRC1_FSH_SHIFT 11 -#define MADERA_ISRC1_FSH_WIDTH 5 #define MADERA_ISRC1_CLK_SEL_MASK 0x0700 #define MADERA_ISRC1_CLK_SEL_SHIFT 8 -#define MADERA_ISRC1_CLK_SEL_WIDTH 3 /* (0x0EF1) ISRC1_CTRL_2 */ #define MADERA_ISRC1_FSL_MASK 0xF800 #define MADERA_ISRC1_FSL_SHIFT 11 -#define MADERA_ISRC1_FSL_WIDTH 5 /* (0x0EF2) ISRC1_CTRL_3 */ #define MADERA_ISRC1_INT1_ENA 0x8000 #define MADERA_ISRC1_INT1_ENA_MASK 0x8000 #define MADERA_ISRC1_INT1_ENA_SHIFT 15 -#define MADERA_ISRC1_INT1_ENA_WIDTH 1 #define MADERA_ISRC1_INT2_ENA 0x4000 #define MADERA_ISRC1_INT2_ENA_MASK 0x4000 #define MADERA_ISRC1_INT2_ENA_SHIFT 14 -#define MADERA_ISRC1_INT2_ENA_WIDTH 1 #define MADERA_ISRC1_INT3_ENA 0x2000 #define MADERA_ISRC1_INT3_ENA_MASK 0x2000 #define MADERA_ISRC1_INT3_ENA_SHIFT 13 -#define MADERA_ISRC1_INT3_ENA_WIDTH 1 #define MADERA_ISRC1_INT4_ENA 0x1000 #define MADERA_ISRC1_INT4_ENA_MASK 0x1000 #define MADERA_ISRC1_INT4_ENA_SHIFT 12 -#define MADERA_ISRC1_INT4_ENA_WIDTH 1 #define MADERA_ISRC1_DEC1_ENA 0x0200 #define MADERA_ISRC1_DEC1_ENA_MASK 0x0200 #define MADERA_ISRC1_DEC1_ENA_SHIFT 9 -#define MADERA_ISRC1_DEC1_ENA_WIDTH 1 #define MADERA_ISRC1_DEC2_ENA 0x0100 #define MADERA_ISRC1_DEC2_ENA_MASK 0x0100 #define MADERA_ISRC1_DEC2_ENA_SHIFT 8 -#define MADERA_ISRC1_DEC2_ENA_WIDTH 1 #define MADERA_ISRC1_DEC3_ENA 0x0080 #define MADERA_ISRC1_DEC3_ENA_MASK 0x0080 #define MADERA_ISRC1_DEC3_ENA_SHIFT 7 -#define MADERA_ISRC1_DEC3_ENA_WIDTH 1 #define MADERA_ISRC1_DEC4_ENA 0x0040 #define MADERA_ISRC1_DEC4_ENA_MASK 0x0040 #define MADERA_ISRC1_DEC4_ENA_SHIFT 6 -#define MADERA_ISRC1_DEC4_ENA_WIDTH 1 #define MADERA_ISRC1_NOTCH_ENA 0x0001 #define MADERA_ISRC1_NOTCH_ENA_MASK 0x0001 #define MADERA_ISRC1_NOTCH_ENA_SHIFT 0 -#define MADERA_ISRC1_NOTCH_ENA_WIDTH 1 /* (0x0EF3) ISRC2_CTRL_1 */ #define MADERA_ISRC2_FSH_MASK 0xF800 #define MADERA_ISRC2_FSH_SHIFT 11 -#define MADERA_ISRC2_FSH_WIDTH 5 #define MADERA_ISRC2_CLK_SEL_MASK 0x0700 #define MADERA_ISRC2_CLK_SEL_SHIFT 8 -#define MADERA_ISRC2_CLK_SEL_WIDTH 3 /* (0x0EF4) ISRC2_CTRL_2 */ #define MADERA_ISRC2_FSL_MASK 0xF800 #define MADERA_ISRC2_FSL_SHIFT 11 -#define MADERA_ISRC2_FSL_WIDTH 5 /* (0x0EF5) ISRC2_CTRL_3 */ #define MADERA_ISRC2_INT1_ENA 0x8000 #define MADERA_ISRC2_INT1_ENA_MASK 0x8000 #define MADERA_ISRC2_INT1_ENA_SHIFT 15 -#define MADERA_ISRC2_INT1_ENA_WIDTH 1 #define MADERA_ISRC2_INT2_ENA 0x4000 #define MADERA_ISRC2_INT2_ENA_MASK 0x4000 #define MADERA_ISRC2_INT2_ENA_SHIFT 14 -#define MADERA_ISRC2_INT2_ENA_WIDTH 1 #define MADERA_ISRC2_INT3_ENA 0x2000 #define MADERA_ISRC2_INT3_ENA_MASK 0x2000 #define MADERA_ISRC2_INT3_ENA_SHIFT 13 -#define MADERA_ISRC2_INT3_ENA_WIDTH 1 #define MADERA_ISRC2_INT4_ENA 0x1000 #define MADERA_ISRC2_INT4_ENA_MASK 0x1000 #define MADERA_ISRC2_INT4_ENA_SHIFT 12 -#define MADERA_ISRC2_INT4_ENA_WIDTH 1 #define MADERA_ISRC2_DEC1_ENA 0x0200 #define MADERA_ISRC2_DEC1_ENA_MASK 0x0200 #define MADERA_ISRC2_DEC1_ENA_SHIFT 9 -#define MADERA_ISRC2_DEC1_ENA_WIDTH 1 #define MADERA_ISRC2_DEC2_ENA 0x0100 #define MADERA_ISRC2_DEC2_ENA_MASK 0x0100 #define MADERA_ISRC2_DEC2_ENA_SHIFT 8 -#define MADERA_ISRC2_DEC2_ENA_WIDTH 1 #define MADERA_ISRC2_DEC3_ENA 0x0080 #define MADERA_ISRC2_DEC3_ENA_MASK 0x0080 #define MADERA_ISRC2_DEC3_ENA_SHIFT 7 -#define MADERA_ISRC2_DEC3_ENA_WIDTH 1 #define MADERA_ISRC2_DEC4_ENA 0x0040 #define MADERA_ISRC2_DEC4_ENA_MASK 0x0040 #define MADERA_ISRC2_DEC4_ENA_SHIFT 6 -#define MADERA_ISRC2_DEC4_ENA_WIDTH 1 #define MADERA_ISRC2_NOTCH_ENA 0x0001 #define MADERA_ISRC2_NOTCH_ENA_MASK 0x0001 #define MADERA_ISRC2_NOTCH_ENA_SHIFT 0 -#define MADERA_ISRC2_NOTCH_ENA_WIDTH 1 /* (0x0EF6) ISRC3_CTRL_1 */ #define MADERA_ISRC3_FSH_MASK 0xF800 #define MADERA_ISRC3_FSH_SHIFT 11 -#define MADERA_ISRC3_FSH_WIDTH 5 #define MADERA_ISRC3_CLK_SEL_MASK 0x0700 #define MADERA_ISRC3_CLK_SEL_SHIFT 8 -#define MADERA_ISRC3_CLK_SEL_WIDTH 3 /* (0x0EF7) ISRC3_CTRL_2 */ #define MADERA_ISRC3_FSL_MASK 0xF800 #define MADERA_ISRC3_FSL_SHIFT 11 -#define MADERA_ISRC3_FSL_WIDTH 5 /* (0x0EF8) ISRC3_CTRL_3 */ #define MADERA_ISRC3_INT1_ENA 0x8000 #define MADERA_ISRC3_INT1_ENA_MASK 0x8000 #define MADERA_ISRC3_INT1_ENA_SHIFT 15 -#define MADERA_ISRC3_INT1_ENA_WIDTH 1 #define MADERA_ISRC3_INT2_ENA 0x4000 #define MADERA_ISRC3_INT2_ENA_MASK 0x4000 #define MADERA_ISRC3_INT2_ENA_SHIFT 14 -#define MADERA_ISRC3_INT2_ENA_WIDTH 1 #define MADERA_ISRC3_INT3_ENA 0x2000 #define MADERA_ISRC3_INT3_ENA_MASK 0x2000 #define MADERA_ISRC3_INT3_ENA_SHIFT 13 -#define MADERA_ISRC3_INT3_ENA_WIDTH 1 #define MADERA_ISRC3_INT4_ENA 0x1000 #define MADERA_ISRC3_INT4_ENA_MASK 0x1000 #define MADERA_ISRC3_INT4_ENA_SHIFT 12 -#define MADERA_ISRC3_INT4_ENA_WIDTH 1 #define MADERA_ISRC3_DEC1_ENA 0x0200 #define MADERA_ISRC3_DEC1_ENA_MASK 0x0200 #define MADERA_ISRC3_DEC1_ENA_SHIFT 9 -#define MADERA_ISRC3_DEC1_ENA_WIDTH 1 #define MADERA_ISRC3_DEC2_ENA 0x0100 #define MADERA_ISRC3_DEC2_ENA_MASK 0x0100 #define MADERA_ISRC3_DEC2_ENA_SHIFT 8 -#define MADERA_ISRC3_DEC2_ENA_WIDTH 1 #define MADERA_ISRC3_DEC3_ENA 0x0080 #define MADERA_ISRC3_DEC3_ENA_MASK 0x0080 #define MADERA_ISRC3_DEC3_ENA_SHIFT 7 -#define MADERA_ISRC3_DEC3_ENA_WIDTH 1 #define MADERA_ISRC3_DEC4_ENA 0x0040 #define MADERA_ISRC3_DEC4_ENA_MASK 0x0040 #define MADERA_ISRC3_DEC4_ENA_SHIFT 6 -#define MADERA_ISRC3_DEC4_ENA_WIDTH 1 #define MADERA_ISRC3_NOTCH_ENA 0x0001 #define MADERA_ISRC3_NOTCH_ENA_MASK 0x0001 #define MADERA_ISRC3_NOTCH_ENA_SHIFT 0 -#define MADERA_ISRC3_NOTCH_ENA_WIDTH 1 /* (0x0EF9) ISRC4_CTRL_1 */ #define MADERA_ISRC4_FSH_MASK 0xF800 #define MADERA_ISRC4_FSH_SHIFT 11 -#define MADERA_ISRC4_FSH_WIDTH 5 #define MADERA_ISRC4_CLK_SEL_MASK 0x0700 #define MADERA_ISRC4_CLK_SEL_SHIFT 8 -#define MADERA_ISRC4_CLK_SEL_WIDTH 3 /* (0x0EFA) ISRC4_CTRL_2 */ #define MADERA_ISRC4_FSL_MASK 0xF800 #define MADERA_ISRC4_FSL_SHIFT 11 -#define MADERA_ISRC4_FSL_WIDTH 5 /* (0x0EFB) ISRC4_CTRL_3 */ #define MADERA_ISRC4_INT1_ENA 0x8000 #define MADERA_ISRC4_INT1_ENA_MASK 0x8000 #define MADERA_ISRC4_INT1_ENA_SHIFT 15 -#define MADERA_ISRC4_INT1_ENA_WIDTH 1 #define MADERA_ISRC4_INT2_ENA 0x4000 #define MADERA_ISRC4_INT2_ENA_MASK 0x4000 #define MADERA_ISRC4_INT2_ENA_SHIFT 14 -#define MADERA_ISRC4_INT2_ENA_WIDTH 1 #define MADERA_ISRC4_INT3_ENA 0x2000 #define MADERA_ISRC4_INT3_ENA_MASK 0x2000 #define MADERA_ISRC4_INT3_ENA_SHIFT 13 -#define MADERA_ISRC4_INT3_ENA_WIDTH 1 #define MADERA_ISRC4_INT4_ENA 0x1000 #define MADERA_ISRC4_INT4_ENA_MASK 0x1000 #define MADERA_ISRC4_INT4_ENA_SHIFT 12 -#define MADERA_ISRC4_INT4_ENA_WIDTH 1 #define MADERA_ISRC4_DEC1_ENA 0x0200 #define MADERA_ISRC4_DEC1_ENA_MASK 0x0200 #define MADERA_ISRC4_DEC1_ENA_SHIFT 9 -#define MADERA_ISRC4_DEC1_ENA_WIDTH 1 #define MADERA_ISRC4_DEC2_ENA 0x0100 #define MADERA_ISRC4_DEC2_ENA_MASK 0x0100 #define MADERA_ISRC4_DEC2_ENA_SHIFT 8 -#define MADERA_ISRC4_DEC2_ENA_WIDTH 1 #define MADERA_ISRC4_DEC3_ENA 0x0080 #define MADERA_ISRC4_DEC3_ENA_MASK 0x0080 #define MADERA_ISRC4_DEC3_ENA_SHIFT 7 -#define MADERA_ISRC4_DEC3_ENA_WIDTH 1 #define MADERA_ISRC4_DEC4_ENA 0x0040 #define MADERA_ISRC4_DEC4_ENA_MASK 0x0040 #define MADERA_ISRC4_DEC4_ENA_SHIFT 6 -#define MADERA_ISRC4_DEC4_ENA_WIDTH 1 #define MADERA_ISRC4_NOTCH_ENA 0x0001 #define MADERA_ISRC4_NOTCH_ENA_MASK 0x0001 #define MADERA_ISRC4_NOTCH_ENA_SHIFT 0 -#define MADERA_ISRC4_NOTCH_ENA_WIDTH 1 /* (0x0F00) Clock_Control */ #define MADERA_EXT_NG_SEL_CLR 0x0080 #define MADERA_EXT_NG_SEL_CLR_MASK 0x0080 #define MADERA_EXT_NG_SEL_CLR_SHIFT 7 -#define MADERA_EXT_NG_SEL_CLR_WIDTH 1 #define MADERA_EXT_NG_SEL_SET 0x0040 #define MADERA_EXT_NG_SEL_SET_MASK 0x0040 #define MADERA_EXT_NG_SEL_SET_SHIFT 6 -#define MADERA_EXT_NG_SEL_SET_WIDTH 1 #define MADERA_CLK_R_ENA_CLR 0x0020 #define MADERA_CLK_R_ENA_CLR_MASK 0x0020 #define MADERA_CLK_R_ENA_CLR_SHIFT 5 -#define MADERA_CLK_R_ENA_CLR_WIDTH 1 #define MADERA_CLK_R_ENA_SET 0x0010 #define MADERA_CLK_R_ENA_SET_MASK 0x0010 #define MADERA_CLK_R_ENA_SET_SHIFT 4 -#define MADERA_CLK_R_ENA_SET_WIDTH 1 #define MADERA_CLK_NG_ENA_CLR 0x0008 #define MADERA_CLK_NG_ENA_CLR_MASK 0x0008 #define MADERA_CLK_NG_ENA_CLR_SHIFT 3 -#define MADERA_CLK_NG_ENA_CLR_WIDTH 1 #define MADERA_CLK_NG_ENA_SET 0x0004 #define MADERA_CLK_NG_ENA_SET_MASK 0x0004 #define MADERA_CLK_NG_ENA_SET_SHIFT 2 -#define MADERA_CLK_NG_ENA_SET_WIDTH 1 #define MADERA_CLK_L_ENA_CLR 0x0002 #define MADERA_CLK_L_ENA_CLR_MASK 0x0002 #define MADERA_CLK_L_ENA_CLR_SHIFT 1 -#define MADERA_CLK_L_ENA_CLR_WIDTH 1 #define MADERA_CLK_L_ENA_SET 0x0001 #define MADERA_CLK_L_ENA_SET_MASK 0x0001 #define MADERA_CLK_L_ENA_SET_SHIFT 0 -#define MADERA_CLK_L_ENA_SET_WIDTH 1 /* (0x0F01) ANC_SRC */ #define MADERA_IN_RXANCR_SEL_MASK 0x0070 #define MADERA_IN_RXANCR_SEL_SHIFT 4 -#define MADERA_IN_RXANCR_SEL_WIDTH 3 #define MADERA_IN_RXANCL_SEL_MASK 0x0007 #define MADERA_IN_RXANCL_SEL_SHIFT 0 -#define MADERA_IN_RXANCL_SEL_WIDTH 3 /* (0x0F17) FCL_ADC_reformatter_control */ #define MADERA_FCL_MIC_MODE_SEL 0x000C #define MADERA_FCL_MIC_MODE_SEL_SHIFT 2 -#define MADERA_FCL_MIC_MODE_SEL_WIDTH 2 /* (0x0F73) FCR_ADC_reformatter_control */ #define MADERA_FCR_MIC_MODE_SEL 0x000C #define MADERA_FCR_MIC_MODE_SEL_SHIFT 2 -#define MADERA_FCR_MIC_MODE_SEL_WIDTH 2 /* (0x10C0) AUXPDM1_CTRL_0 */ #define MADERA_AUXPDM1_SRC_MASK 0x0F00 #define MADERA_AUXPDM1_SRC_SHIFT 8 -#define MADERA_AUXPDM1_SRC_WIDTH 4 #define MADERA_AUXPDM1_TXEDGE_MASK 0x0010 #define MADERA_AUXPDM1_TXEDGE_SHIFT 4 -#define MADERA_AUXPDM1_TXEDGE_WIDTH 1 #define MADERA_AUXPDM1_MSTR_MASK 0x0008 #define MADERA_AUXPDM1_MSTR_SHIFT 3 -#define MADERA_AUXPDM1_MSTR_WIDTH 1 #define MADERA_AUXPDM1_ENABLE_MASK 0x0001 #define MADERA_AUXPDM1_ENABLE_SHIFT 0 -#define MADERA_AUXPDM1_ENABLE_WIDTH 1 /* (0x10C1) AUXPDM1_CTRL_1 */ #define MADERA_AUXPDM1_CLK_FREQ_MASK 0xC000 #define MADERA_AUXPDM1_CLK_FREQ_SHIFT 14 -#define MADERA_AUXPDM1_CLK_FREQ_WIDTH 2 /* (0x1480) DFC1_CTRL_W0 */ #define MADERA_DFC1_RATE_MASK 0x007C #define MADERA_DFC1_RATE_SHIFT 2 -#define MADERA_DFC1_RATE_WIDTH 5 #define MADERA_DFC1_DITH_ENA 0x0002 #define MADERA_DFC1_DITH_ENA_MASK 0x0002 #define MADERA_DFC1_DITH_ENA_SHIFT 1 -#define MADERA_DFC1_DITH_ENA_WIDTH 1 #define MADERA_DFC1_ENA 0x0001 #define MADERA_DFC1_ENA_MASK 0x0001 #define MADERA_DFC1_ENA_SHIFT 0 -#define MADERA_DFC1_ENA_WIDTH 1 /* (0x1482) DFC1_RX_W0 */ #define MADERA_DFC1_RX_DATA_WIDTH_MASK 0x1F00 #define MADERA_DFC1_RX_DATA_WIDTH_SHIFT 8 -#define MADERA_DFC1_RX_DATA_WIDTH_WIDTH 5 #define MADERA_DFC1_RX_DATA_TYPE_MASK 0x0007 #define MADERA_DFC1_RX_DATA_TYPE_SHIFT 0 -#define MADERA_DFC1_RX_DATA_TYPE_WIDTH 3 /* (0x1484) DFC1_TX_W0 */ #define MADERA_DFC1_TX_DATA_WIDTH_MASK 0x1F00 #define MADERA_DFC1_TX_DATA_WIDTH_SHIFT 8 -#define MADERA_DFC1_TX_DATA_WIDTH_WIDTH 5 #define MADERA_DFC1_TX_DATA_TYPE_MASK 0x0007 #define MADERA_DFC1_TX_DATA_TYPE_SHIFT 0 -#define MADERA_DFC1_TX_DATA_TYPE_WIDTH 3 /* (0x1600) ADSP2_IRQ0 */ #define MADERA_DSP_IRQ2 0x0002 @@ -3636,449 +3103,347 @@ #define MADERA_GP1_LVL 0x8000 #define MADERA_GP1_LVL_MASK 0x8000 #define MADERA_GP1_LVL_SHIFT 15 -#define MADERA_GP1_LVL_WIDTH 1 #define MADERA_GP1_OP_CFG 0x4000 #define MADERA_GP1_OP_CFG_MASK 0x4000 #define MADERA_GP1_OP_CFG_SHIFT 14 -#define MADERA_GP1_OP_CFG_WIDTH 1 #define MADERA_GP1_DB 0x2000 #define MADERA_GP1_DB_MASK 0x2000 #define MADERA_GP1_DB_SHIFT 13 -#define MADERA_GP1_DB_WIDTH 1 #define MADERA_GP1_POL 0x1000 #define MADERA_GP1_POL_MASK 0x1000 #define MADERA_GP1_POL_SHIFT 12 -#define MADERA_GP1_POL_WIDTH 1 #define MADERA_GP1_IP_CFG 0x0800 #define MADERA_GP1_IP_CFG_MASK 0x0800 #define MADERA_GP1_IP_CFG_SHIFT 11 -#define MADERA_GP1_IP_CFG_WIDTH 1 #define MADERA_GP1_FN_MASK 0x03FF #define MADERA_GP1_FN_SHIFT 0 -#define MADERA_GP1_FN_WIDTH 10 /* (0x1701) GPIO1_CTRL_2 */ #define MADERA_GP1_DIR 0x8000 #define MADERA_GP1_DIR_MASK 0x8000 #define MADERA_GP1_DIR_SHIFT 15 -#define MADERA_GP1_DIR_WIDTH 1 #define MADERA_GP1_PU 0x4000 #define MADERA_GP1_PU_MASK 0x4000 #define MADERA_GP1_PU_SHIFT 14 -#define MADERA_GP1_PU_WIDTH 1 #define MADERA_GP1_PD 0x2000 #define MADERA_GP1_PD_MASK 0x2000 #define MADERA_GP1_PD_SHIFT 13 -#define MADERA_GP1_PD_WIDTH 1 #define MADERA_GP1_DRV_STR_MASK 0x1800 #define MADERA_GP1_DRV_STR_SHIFT 11 -#define MADERA_GP1_DRV_STR_WIDTH 2 /* (0x1800) IRQ1_Status_1 */ #define MADERA_CTRLIF_ERR_EINT1 0x1000 #define MADERA_CTRLIF_ERR_EINT1_MASK 0x1000 #define MADERA_CTRLIF_ERR_EINT1_SHIFT 12 -#define MADERA_CTRLIF_ERR_EINT1_WIDTH 1 #define MADERA_SYSCLK_FAIL_EINT1 0x0200 #define MADERA_SYSCLK_FAIL_EINT1_MASK 0x0200 #define MADERA_SYSCLK_FAIL_EINT1_SHIFT 9 -#define MADERA_SYSCLK_FAIL_EINT1_WIDTH 1 #define MADERA_CLOCK_DETECT_EINT1 0x0100 #define MADERA_CLOCK_DETECT_EINT1_MASK 0x0100 #define MADERA_CLOCK_DETECT_EINT1_SHIFT 8 -#define MADERA_CLOCK_DETECT_EINT1_WIDTH 1 #define MADERA_BOOT_DONE_EINT1 0x0080 #define MADERA_BOOT_DONE_EINT1_MASK 0x0080 #define MADERA_BOOT_DONE_EINT1_SHIFT 7 -#define MADERA_BOOT_DONE_EINT1_WIDTH 1 /* (0x1801) IRQ1_Status_2 */ #define MADERA_FLLAO_LOCK_EINT1 0x0800 #define MADERA_FLLAO_LOCK_EINT1_MASK 0x0800 #define MADERA_FLLAO_LOCK_EINT1_SHIFT 11 -#define MADERA_FLLAO_LOCK_EINT1_WIDTH 1 #define MADERA_FLL3_LOCK_EINT1 0x0400 #define MADERA_FLL3_LOCK_EINT1_MASK 0x0400 #define MADERA_FLL3_LOCK_EINT1_SHIFT 10 -#define MADERA_FLL3_LOCK_EINT1_WIDTH 1 #define MADERA_FLL2_LOCK_EINT1 0x0200 #define MADERA_FLL2_LOCK_EINT1_MASK 0x0200 #define MADERA_FLL2_LOCK_EINT1_SHIFT 9 -#define MADERA_FLL2_LOCK_EINT1_WIDTH 1 #define MADERA_FLL1_LOCK_EINT1 0x0100 #define MADERA_FLL1_LOCK_EINT1_MASK 0x0100 #define MADERA_FLL1_LOCK_EINT1_SHIFT 8 -#define MADERA_FLL1_LOCK_EINT1_WIDTH 1 /* (0x1805) IRQ1_Status_6 */ #define MADERA_MICDET2_EINT1 0x0200 #define MADERA_MICDET2_EINT1_MASK 0x0200 #define MADERA_MICDET2_EINT1_SHIFT 9 -#define MADERA_MICDET2_EINT1_WIDTH 1 #define MADERA_MICDET1_EINT1 0x0100 #define MADERA_MICDET1_EINT1_MASK 0x0100 #define MADERA_MICDET1_EINT1_SHIFT 8 -#define MADERA_MICDET1_EINT1_WIDTH 1 #define MADERA_HPDET_EINT1 0x0001 #define MADERA_HPDET_EINT1_MASK 0x0001 #define MADERA_HPDET_EINT1_SHIFT 0 -#define MADERA_HPDET_EINT1_WIDTH 1 /* (0x1806) IRQ1_Status_7 */ #define MADERA_MICD_CLAMP_FALL_EINT1 0x0020 #define MADERA_MICD_CLAMP_FALL_EINT1_MASK 0x0020 #define MADERA_MICD_CLAMP_FALL_EINT1_SHIFT 5 -#define MADERA_MICD_CLAMP_FALL_EINT1_WIDTH 1 #define MADERA_MICD_CLAMP_RISE_EINT1 0x0010 #define MADERA_MICD_CLAMP_RISE_EINT1_MASK 0x0010 #define MADERA_MICD_CLAMP_RISE_EINT1_SHIFT 4 -#define MADERA_MICD_CLAMP_RISE_EINT1_WIDTH 1 #define MADERA_JD2_FALL_EINT1 0x0008 #define MADERA_JD2_FALL_EINT1_MASK 0x0008 #define MADERA_JD2_FALL_EINT1_SHIFT 3 -#define MADERA_JD2_FALL_EINT1_WIDTH 1 #define MADERA_JD2_RISE_EINT1 0x0004 #define MADERA_JD2_RISE_EINT1_MASK 0x0004 #define MADERA_JD2_RISE_EINT1_SHIFT 2 -#define MADERA_JD2_RISE_EINT1_WIDTH 1 #define MADERA_JD1_FALL_EINT1 0x0002 #define MADERA_JD1_FALL_EINT1_MASK 0x0002 #define MADERA_JD1_FALL_EINT1_SHIFT 1 -#define MADERA_JD1_FALL_EINT1_WIDTH 1 #define MADERA_JD1_RISE_EINT1 0x0001 #define MADERA_JD1_RISE_EINT1_MASK 0x0001 #define MADERA_JD1_RISE_EINT1_SHIFT 0 -#define MADERA_JD1_RISE_EINT1_WIDTH 1 /* (0x1808) IRQ1_Status_9 */ #define MADERA_ASRC2_IN2_LOCK_EINT1 0x0800 #define MADERA_ASRC2_IN2_LOCK_EINT1_MASK 0x0800 #define MADERA_ASRC2_IN2_LOCK_EINT1_SHIFT 11 -#define MADERA_ASRC2_IN2_LOCK_EINT1_WIDTH 1 #define MADERA_ASRC2_IN1_LOCK_EINT1 0x0400 #define MADERA_ASRC2_IN1_LOCK_EINT1_MASK 0x0400 #define MADERA_ASRC2_IN1_LOCK_EINT1_SHIFT 10 -#define MADERA_ASRC2_IN1_LOCK_EINT1_WIDTH 1 #define MADERA_ASRC1_IN2_LOCK_EINT1 0x0200 #define MADERA_ASRC1_IN2_LOCK_EINT1_MASK 0x0200 #define MADERA_ASRC1_IN2_LOCK_EINT1_SHIFT 9 -#define MADERA_ASRC1_IN2_LOCK_EINT1_WIDTH 1 #define MADERA_ASRC1_IN1_LOCK_EINT1 0x0100 #define MADERA_ASRC1_IN1_LOCK_EINT1_MASK 0x0100 #define MADERA_ASRC1_IN1_LOCK_EINT1_SHIFT 8 -#define MADERA_ASRC1_IN1_LOCK_EINT1_WIDTH 1 #define MADERA_DRC2_SIG_DET_EINT1 0x0002 #define MADERA_DRC2_SIG_DET_EINT1_MASK 0x0002 #define MADERA_DRC2_SIG_DET_EINT1_SHIFT 1 -#define MADERA_DRC2_SIG_DET_EINT1_WIDTH 1 #define MADERA_DRC1_SIG_DET_EINT1 0x0001 #define MADERA_DRC1_SIG_DET_EINT1_MASK 0x0001 #define MADERA_DRC1_SIG_DET_EINT1_SHIFT 0 -#define MADERA_DRC1_SIG_DET_EINT1_WIDTH 1 /* (0x180A) IRQ1_Status_11 */ #define MADERA_DSP_IRQ16_EINT1 0x8000 #define MADERA_DSP_IRQ16_EINT1_MASK 0x8000 #define MADERA_DSP_IRQ16_EINT1_SHIFT 15 -#define MADERA_DSP_IRQ16_EINT1_WIDTH 1 #define MADERA_DSP_IRQ15_EINT1 0x4000 #define MADERA_DSP_IRQ15_EINT1_MASK 0x4000 #define MADERA_DSP_IRQ15_EINT1_SHIFT 14 -#define MADERA_DSP_IRQ15_EINT1_WIDTH 1 #define MADERA_DSP_IRQ14_EINT1 0x2000 #define MADERA_DSP_IRQ14_EINT1_MASK 0x2000 #define MADERA_DSP_IRQ14_EINT1_SHIFT 13 -#define MADERA_DSP_IRQ14_EINT1_WIDTH 1 #define MADERA_DSP_IRQ13_EINT1 0x1000 #define MADERA_DSP_IRQ13_EINT1_MASK 0x1000 #define MADERA_DSP_IRQ13_EINT1_SHIFT 12 -#define MADERA_DSP_IRQ13_EINT1_WIDTH 1 #define MADERA_DSP_IRQ12_EINT1 0x0800 #define MADERA_DSP_IRQ12_EINT1_MASK 0x0800 #define MADERA_DSP_IRQ12_EINT1_SHIFT 11 -#define MADERA_DSP_IRQ12_EINT1_WIDTH 1 #define MADERA_DSP_IRQ11_EINT1 0x0400 #define MADERA_DSP_IRQ11_EINT1_MASK 0x0400 #define MADERA_DSP_IRQ11_EINT1_SHIFT 10 -#define MADERA_DSP_IRQ11_EINT1_WIDTH 1 #define MADERA_DSP_IRQ10_EINT1 0x0200 #define MADERA_DSP_IRQ10_EINT1_MASK 0x0200 #define MADERA_DSP_IRQ10_EINT1_SHIFT 9 -#define MADERA_DSP_IRQ10_EINT1_WIDTH 1 #define MADERA_DSP_IRQ9_EINT1 0x0100 #define MADERA_DSP_IRQ9_EINT1_MASK 0x0100 #define MADERA_DSP_IRQ9_EINT1_SHIFT 8 -#define MADERA_DSP_IRQ9_EINT1_WIDTH 1 #define MADERA_DSP_IRQ8_EINT1 0x0080 #define MADERA_DSP_IRQ8_EINT1_MASK 0x0080 #define MADERA_DSP_IRQ8_EINT1_SHIFT 7 -#define MADERA_DSP_IRQ8_EINT1_WIDTH 1 #define MADERA_DSP_IRQ7_EINT1 0x0040 #define MADERA_DSP_IRQ7_EINT1_MASK 0x0040 #define MADERA_DSP_IRQ7_EINT1_SHIFT 6 -#define MADERA_DSP_IRQ7_EINT1_WIDTH 1 #define MADERA_DSP_IRQ6_EINT1 0x0020 #define MADERA_DSP_IRQ6_EINT1_MASK 0x0020 #define MADERA_DSP_IRQ6_EINT1_SHIFT 5 -#define MADERA_DSP_IRQ6_EINT1_WIDTH 1 #define MADERA_DSP_IRQ5_EINT1 0x0010 #define MADERA_DSP_IRQ5_EINT1_MASK 0x0010 #define MADERA_DSP_IRQ5_EINT1_SHIFT 4 -#define MADERA_DSP_IRQ5_EINT1_WIDTH 1 #define MADERA_DSP_IRQ4_EINT1 0x0008 #define MADERA_DSP_IRQ4_EINT1_MASK 0x0008 #define MADERA_DSP_IRQ4_EINT1_SHIFT 3 -#define MADERA_DSP_IRQ4_EINT1_WIDTH 1 #define MADERA_DSP_IRQ3_EINT1 0x0004 #define MADERA_DSP_IRQ3_EINT1_MASK 0x0004 #define MADERA_DSP_IRQ3_EINT1_SHIFT 2 -#define MADERA_DSP_IRQ3_EINT1_WIDTH 1 #define MADERA_DSP_IRQ2_EINT1 0x0002 #define MADERA_DSP_IRQ2_EINT1_MASK 0x0002 #define MADERA_DSP_IRQ2_EINT1_SHIFT 1 -#define MADERA_DSP_IRQ2_EINT1_WIDTH 1 #define MADERA_DSP_IRQ1_EINT1 0x0001 #define MADERA_DSP_IRQ1_EINT1_MASK 0x0001 #define MADERA_DSP_IRQ1_EINT1_SHIFT 0 -#define MADERA_DSP_IRQ1_EINT1_WIDTH 1 /* (0x180B) IRQ1_Status_12 */ #define MADERA_SPKOUTR_SC_EINT1 0x0080 #define MADERA_SPKOUTR_SC_EINT1_MASK 0x0080 #define MADERA_SPKOUTR_SC_EINT1_SHIFT 7 -#define MADERA_SPKOUTR_SC_EINT1_WIDTH 1 #define MADERA_SPKOUTL_SC_EINT1 0x0040 #define MADERA_SPKOUTL_SC_EINT1_MASK 0x0040 #define MADERA_SPKOUTL_SC_EINT1_SHIFT 6 -#define MADERA_SPKOUTL_SC_EINT1_WIDTH 1 #define MADERA_HP3R_SC_EINT1 0x0020 #define MADERA_HP3R_SC_EINT1_MASK 0x0020 #define MADERA_HP3R_SC_EINT1_SHIFT 5 -#define MADERA_HP3R_SC_EINT1_WIDTH 1 #define MADERA_HP3L_SC_EINT1 0x0010 #define MADERA_HP3L_SC_EINT1_MASK 0x0010 #define MADERA_HP3L_SC_EINT1_SHIFT 4 -#define MADERA_HP3L_SC_EINT1_WIDTH 1 #define MADERA_HP2R_SC_EINT1 0x0008 #define MADERA_HP2R_SC_EINT1_MASK 0x0008 #define MADERA_HP2R_SC_EINT1_SHIFT 3 -#define MADERA_HP2R_SC_EINT1_WIDTH 1 #define MADERA_HP2L_SC_EINT1 0x0004 #define MADERA_HP2L_SC_EINT1_MASK 0x0004 #define MADERA_HP2L_SC_EINT1_SHIFT 2 -#define MADERA_HP2L_SC_EINT1_WIDTH 1 #define MADERA_HP1R_SC_EINT1 0x0002 #define MADERA_HP1R_SC_EINT1_MASK 0x0002 #define MADERA_HP1R_SC_EINT1_SHIFT 1 -#define MADERA_HP1R_SC_EINT1_WIDTH 1 #define MADERA_HP1L_SC_EINT1 0x0001 #define MADERA_HP1L_SC_EINT1_MASK 0x0001 #define MADERA_HP1L_SC_EINT1_SHIFT 0 -#define MADERA_HP1L_SC_EINT1_WIDTH 1 /* (0x180E) IRQ1_Status_15 */ #define MADERA_SPK_OVERHEAT_WARN_EINT1 0x0004 #define MADERA_SPK_OVERHEAT_WARN_EINT1_MASK 0x0004 #define MADERA_SPK_OVERHEAT_WARN_EINT1_SHIFT 2 -#define MADERA_SPK_OVERHEAT_WARN_EINT1_WIDTH 1 #define MADERA_SPK_OVERHEAT_EINT1 0x0002 #define MADERA_SPK_OVERHEAT_EINT1_MASK 0x0002 #define MADERA_SPK_OVERHEAT_EINT1_SHIFT 1 -#define MADERA_SPK_OVERHEAT_EINT1_WIDTH 1 #define MADERA_SPK_SHUTDOWN_EINT1 0x0001 #define MADERA_SPK_SHUTDOWN_EINT1_MASK 0x0001 #define MADERA_SPK_SHUTDOWN_EINT1_SHIFT 0 -#define MADERA_SPK_SHUTDOWN_EINT1_WIDTH 1 /* (0x1820) - IRQ1 Status 33 */ #define MADERA_DSP7_BUS_ERR_EINT1 0x0040 #define MADERA_DSP7_BUS_ERR_EINT1_MASK 0x0040 #define MADERA_DSP7_BUS_ERR_EINT1_SHIFT 6 -#define MADERA_DSP7_BUS_ERR_EINT1_WIDTH 1 #define MADERA_DSP6_BUS_ERR_EINT1 0x0020 #define MADERA_DSP6_BUS_ERR_EINT1_MASK 0x0020 #define MADERA_DSP6_BUS_ERR_EINT1_SHIFT 5 -#define MADERA_DSP6_BUS_ERR_EINT1_WIDTH 1 #define MADERA_DSP5_BUS_ERR_EINT1 0x0010 #define MADERA_DSP5_BUS_ERR_EINT1_MASK 0x0010 #define MADERA_DSP5_BUS_ERR_EINT1_SHIFT 4 -#define MADERA_DSP5_BUS_ERR_EINT1_WIDTH 1 #define MADERA_DSP4_BUS_ERR_EINT1 0x0008 #define MADERA_DSP4_BUS_ERR_EINT1_MASK 0x0008 #define MADERA_DSP4_BUS_ERR_EINT1_SHIFT 3 -#define MADERA_DSP4_BUS_ERR_EINT1_WIDTH 1 #define MADERA_DSP3_BUS_ERR_EINT1 0x0004 #define MADERA_DSP3_BUS_ERR_EINT1_MASK 0x0004 #define MADERA_DSP3_BUS_ERR_EINT1_SHIFT 2 -#define MADERA_DSP3_BUS_ERR_EINT1_WIDTH 1 #define MADERA_DSP2_BUS_ERR_EINT1 0x0002 #define MADERA_DSP2_BUS_ERR_EINT1_MASK 0x0002 #define MADERA_DSP2_BUS_ERR_EINT1_SHIFT 1 -#define MADERA_DSP2_BUS_ERR_EINT1_WIDTH 1 #define MADERA_DSP1_BUS_ERR_EINT1 0x0001 #define MADERA_DSP1_BUS_ERR_EINT1_MASK 0x0001 #define MADERA_DSP1_BUS_ERR_EINT1_SHIFT 0 -#define MADERA_DSP1_BUS_ERR_EINT1_WIDTH 1 /* (0x1845) IRQ1_Mask_6 */ #define MADERA_IM_MICDET2_EINT1 0x0200 #define MADERA_IM_MICDET2_EINT1_MASK 0x0200 #define MADERA_IM_MICDET2_EINT1_SHIFT 9 -#define MADERA_IM_MICDET2_EINT1_WIDTH 1 #define MADERA_IM_MICDET1_EINT1 0x0100 #define MADERA_IM_MICDET1_EINT1_MASK 0x0100 #define MADERA_IM_MICDET1_EINT1_SHIFT 8 -#define MADERA_IM_MICDET1_EINT1_WIDTH 1 #define MADERA_IM_HPDET_EINT1 0x0001 #define MADERA_IM_HPDET_EINT1_MASK 0x0001 #define MADERA_IM_HPDET_EINT1_SHIFT 0 -#define MADERA_IM_HPDET_EINT1_WIDTH 1 /* (0x184E) IRQ1_Mask_15 */ #define MADERA_IM_SPK_OVERHEAT_WARN_EINT1 0x0004 #define MADERA_IM_SPK_OVERHEAT_WARN_EINT1_MASK 0x0004 #define MADERA_IM_SPK_OVERHEAT_WARN_EINT1_SHIFT 2 -#define MADERA_IM_SPK_OVERHEAT_WARN_EINT1_WIDTH 1 #define MADERA_IM_SPK_OVERHEAT_EINT1 0x0002 #define MADERA_IM_SPK_OVERHEAT_EINT1_MASK 0x0002 #define MADERA_IM_SPK_OVERHEAT_EINT1_SHIFT 1 -#define MADERA_IM_SPK_OVERHEAT_EINT1_WIDTH 1 #define MADERA_IM_SPK_SHUTDOWN_EINT1 0x0001 #define MADERA_IM_SPK_SHUTDOWN_EINT1_MASK 0x0001 #define MADERA_IM_SPK_SHUTDOWN_EINT1_SHIFT 0 -#define MADERA_IM_SPK_SHUTDOWN_EINT1_WIDTH 1 /* (0x1880) - IRQ1 Raw Status 1 */ #define MADERA_CTRLIF_ERR_STS1 0x1000 #define MADERA_CTRLIF_ERR_STS1_MASK 0x1000 #define MADERA_CTRLIF_ERR_STS1_SHIFT 12 -#define MADERA_CTRLIF_ERR_STS1_WIDTH 1 #define MADERA_SYSCLK_FAIL_STS1 0x0200 #define MADERA_SYSCLK_FAIL_STS1_MASK 0x0200 #define MADERA_SYSCLK_FAIL_STS1_SHIFT 9 -#define MADERA_SYSCLK_FAIL_STS1_WIDTH 1 #define MADERA_CLOCK_DETECT_STS1 0x0100 #define MADERA_CLOCK_DETECT_STS1_MASK 0x0100 #define MADERA_CLOCK_DETECT_STS1_SHIFT 8 -#define MADERA_CLOCK_DETECT_STS1_WIDTH 1 #define MADERA_BOOT_DONE_STS1 0x0080 #define MADERA_BOOT_DONE_STS1_MASK 0x0080 #define MADERA_BOOT_DONE_STS1_SHIFT 7 -#define MADERA_BOOT_DONE_STS1_WIDTH 1 /* (0x1881) - IRQ1 Raw Status 2 */ #define MADERA_FLL3_LOCK_STS1 0x0400 #define MADERA_FLL3_LOCK_STS1_MASK 0x0400 #define MADERA_FLL3_LOCK_STS1_SHIFT 10 -#define MADERA_FLL3_LOCK_STS1_WIDTH 1 #define MADERA_FLL2_LOCK_STS1 0x0200 #define MADERA_FLL2_LOCK_STS1_MASK 0x0200 #define MADERA_FLL2_LOCK_STS1_SHIFT 9 -#define MADERA_FLL2_LOCK_STS1_WIDTH 1 #define MADERA_FLL1_LOCK_STS1 0x0100 #define MADERA_FLL1_LOCK_STS1_MASK 0x0100 #define MADERA_FLL1_LOCK_STS1_SHIFT 8 -#define MADERA_FLL1_LOCK_STS1_WIDTH 1 /* (0x1886) - IRQ1 Raw Status 7 */ #define MADERA_MICD_CLAMP_FALL_STS1 0x0020 #define MADERA_MICD_CLAMP_FALL_STS1_MASK 0x0020 #define MADERA_MICD_CLAMP_FALL_STS1_SHIFT 5 -#define MADERA_MICD_CLAMP_FALL_STS1_WIDTH 1 #define MADERA_MICD_CLAMP_RISE_STS1 0x0010 #define MADERA_MICD_CLAMP_RISE_STS1_MASK 0x0010 #define MADERA_MICD_CLAMP_RISE_STS1_SHIFT 4 -#define MADERA_MICD_CLAMP_RISE_STS1_WIDTH 1 #define MADERA_JD2_FALL_STS1 0x0008 #define MADERA_JD2_FALL_STS1_MASK 0x0008 #define MADERA_JD2_FALL_STS1_SHIFT 3 -#define MADERA_JD2_FALL_STS1_WIDTH 1 #define MADERA_JD2_RISE_STS1 0x0004 #define MADERA_JD2_RISE_STS1_MASK 0x0004 #define MADERA_JD2_RISE_STS1_SHIFT 2 -#define MADERA_JD2_RISE_STS1_WIDTH 1 #define MADERA_JD1_FALL_STS1 0x0002 #define MADERA_JD1_FALL_STS1_MASK 0x0002 #define MADERA_JD1_FALL_STS1_SHIFT 1 -#define MADERA_JD1_FALL_STS1_WIDTH 1 #define MADERA_JD1_RISE_STS1 0x0001 #define MADERA_JD1_RISE_STS1_MASK 0x0001 #define MADERA_JD1_RISE_STS1_SHIFT 0 -#define MADERA_JD1_RISE_STS1_WIDTH 1 /* (0x188E) - IRQ1 Raw Status 15 */ #define MADERA_SPK_OVERHEAT_WARN_STS1 0x0004 #define MADERA_SPK_OVERHEAT_WARN_STS1_MASK 0x0004 #define MADERA_SPK_OVERHEAT_WARN_STS1_SHIFT 2 -#define MADERA_SPK_OVERHEAT_WARN_STS1_WIDTH 1 #define MADERA_SPK_OVERHEAT_STS1 0x0002 #define MADERA_SPK_OVERHEAT_STS1_MASK 0x0002 #define MADERA_SPK_OVERHEAT_STS1_SHIFT 1 -#define MADERA_SPK_OVERHEAT_STS1_WIDTH 1 #define MADERA_SPK_SHUTDOWN_STS1 0x0001 #define MADERA_SPK_SHUTDOWN_STS1_MASK 0x0001 #define MADERA_SPK_SHUTDOWN_STS1_SHIFT 0 -#define MADERA_SPK_SHUTDOWN_STS1_WIDTH 1 /* (0x1A06) Interrupt_Debounce_7 */ #define MADERA_MICD_CLAMP_DB 0x0010 #define MADERA_MICD_CLAMP_DB_MASK 0x0010 #define MADERA_MICD_CLAMP_DB_SHIFT 4 -#define MADERA_MICD_CLAMP_DB_WIDTH 1 #define MADERA_JD2_DB 0x0004 #define MADERA_JD2_DB_MASK 0x0004 #define MADERA_JD2_DB_SHIFT 2 -#define MADERA_JD2_DB_WIDTH 1 #define MADERA_JD1_DB 0x0001 #define MADERA_JD1_DB_MASK 0x0001 #define MADERA_JD1_DB_SHIFT 0 -#define MADERA_JD1_DB_WIDTH 1 /* (0x1A0E) Interrupt_Debounce_15 */ #define MADERA_SPK_OVERHEAT_WARN_DB 0x0004 #define MADERA_SPK_OVERHEAT_WARN_DB_MASK 0x0004 #define MADERA_SPK_OVERHEAT_WARN_DB_SHIFT 2 -#define MADERA_SPK_OVERHEAT_WARN_DB_WIDTH 1 #define MADERA_SPK_OVERHEAT_DB 0x0002 #define MADERA_SPK_OVERHEAT_DB_MASK 0x0002 #define MADERA_SPK_OVERHEAT_DB_SHIFT 1 -#define MADERA_SPK_OVERHEAT_DB_WIDTH 1 /* (0x1A80) IRQ1_CTRL */ #define MADERA_IM_IRQ1 0x0800 #define MADERA_IM_IRQ1_MASK 0x0800 #define MADERA_IM_IRQ1_SHIFT 11 -#define MADERA_IM_IRQ1_WIDTH 1 #define MADERA_IRQ_POL 0x0400 #define MADERA_IRQ_POL_MASK 0x0400 #define MADERA_IRQ_POL_SHIFT 10 -#define MADERA_IRQ_POL_WIDTH 1 /* (0x20004) OTP_HPDET_Cal_1 */ #define MADERA_OTP_HPDET_CALIB_OFFSET_11 0xFF000000 #define MADERA_OTP_HPDET_CALIB_OFFSET_11_MASK 0xFF000000 #define MADERA_OTP_HPDET_CALIB_OFFSET_11_SHIFT 24 -#define MADERA_OTP_HPDET_CALIB_OFFSET_11_WIDTH 8 #define MADERA_OTP_HPDET_CALIB_OFFSET_10 0x00FF0000 #define MADERA_OTP_HPDET_CALIB_OFFSET_10_MASK 0x00FF0000 #define MADERA_OTP_HPDET_CALIB_OFFSET_10_SHIFT 16 -#define MADERA_OTP_HPDET_CALIB_OFFSET_10_WIDTH 8 #define MADERA_OTP_HPDET_CALIB_OFFSET_01 0x0000FF00 #define MADERA_OTP_HPDET_CALIB_OFFSET_01_MASK 0x0000FF00 #define MADERA_OTP_HPDET_CALIB_OFFSET_01_SHIFT 8 -#define MADERA_OTP_HPDET_CALIB_OFFSET_01_WIDTH 8 #define MADERA_OTP_HPDET_CALIB_OFFSET_00 0x000000FF #define MADERA_OTP_HPDET_CALIB_OFFSET_00_MASK 0x000000FF #define MADERA_OTP_HPDET_CALIB_OFFSET_00_SHIFT 0 -#define MADERA_OTP_HPDET_CALIB_OFFSET_00_WIDTH 8 /* (0x20006) OTP_HPDET_Cal_2 */ #define MADERA_OTP_HPDET_GRADIENT_1X 0x0000FF00 #define MADERA_OTP_HPDET_GRADIENT_1X_MASK 0x0000FF00 #define MADERA_OTP_HPDET_GRADIENT_1X_SHIFT 8 -#define MADERA_OTP_HPDET_GRADIENT_1X_WIDTH 8 #define MADERA_OTP_HPDET_GRADIENT_0X 0x000000FF #define MADERA_OTP_HPDET_GRADIENT_0X_MASK 0x000000FF #define MADERA_OTP_HPDET_GRADIENT_0X_SHIFT 0 -#define MADERA_OTP_HPDET_GRADIENT_0X_WIDTH 8 #endif -- cgit v1.2.3 From 28faad777c2d1480dfdda6697e58c06cf9011ebc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sun, 27 Sep 2020 01:59:17 +0200 Subject: mfd: tps65910: Clean up after switching to regmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove wrappers around regmap calls to remove now-useless indirection. Signed-off-by: Michał Mirosław Signed-off-by: Lee Jones --- include/linux/mfd/tps65910.h | 35 ----------------------------------- 1 file changed, 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h index ce4b9e743f7c..f7398d982f23 100644 --- a/include/linux/mfd/tps65910.h +++ b/include/linux/mfd/tps65910.h @@ -913,39 +913,4 @@ static inline int tps65910_chip_id(struct tps65910 *tps65910) return tps65910->id; } -static inline int tps65910_reg_read(struct tps65910 *tps65910, u8 reg, - unsigned int *val) -{ - return regmap_read(tps65910->regmap, reg, val); -} - -static inline int tps65910_reg_write(struct tps65910 *tps65910, u8 reg, - unsigned int val) -{ - return regmap_write(tps65910->regmap, reg, val); -} - -static inline int tps65910_reg_set_bits(struct tps65910 *tps65910, u8 reg, - u8 mask) -{ - return regmap_update_bits(tps65910->regmap, reg, mask, mask); -} - -static inline int tps65910_reg_clear_bits(struct tps65910 *tps65910, u8 reg, - u8 mask) -{ - return regmap_update_bits(tps65910->regmap, reg, mask, 0); -} - -static inline int tps65910_reg_update_bits(struct tps65910 *tps65910, u8 reg, - u8 mask, u8 val) -{ - return regmap_update_bits(tps65910->regmap, reg, mask, val); -} - -static inline int tps65910_irq_get_virq(struct tps65910 *tps65910, int irq) -{ - return regmap_irq_get_virq(tps65910->irq_data, irq); -} - #endif /* __LINUX_MFD_TPS65910_H */ -- cgit v1.2.3 From 9f5b98f3f4149a10c315ddb4d0bed033f398e8ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sun, 27 Sep 2020 01:59:18 +0200 Subject: mfd: tps65910: Remove unused pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Client pointers in tps65910 data are not used in the drivers. Remove those fields. Signed-off-by: Michał Mirosław Signed-off-by: Lee Jones --- include/linux/mfd/tps65910.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h index f7398d982f23..701925db75b3 100644 --- a/include/linux/mfd/tps65910.h +++ b/include/linux/mfd/tps65910.h @@ -890,11 +890,6 @@ struct tps65910 { struct regmap *regmap; unsigned long id; - /* Client devices */ - struct tps65910_pmic *pmic; - struct tps65910_rtc *rtc; - struct tps65910_power *power; - /* Device node parsed board data */ struct tps65910_board *of_plat_data; -- cgit v1.2.3 From f594d01bb4aff35dc582f5418e6823f79e28834b Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 27 Oct 2020 09:41:32 +0000 Subject: mfd: madera: Add special errata reset handling for cs47l15 An errata exists for cs47l15 where the reset must be handled differently and removed before DCVDD is applied. A soft reset is used for situations where a reset is required to reset state. This does however, make this part unsuitable for DCVDD supplies with a rise time greater than 2mS. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/madera/core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/madera/core.h b/include/linux/mfd/madera/core.h index ad2c138105d4..03a8a788424a 100644 --- a/include/linux/mfd/madera/core.h +++ b/include/linux/mfd/madera/core.h @@ -186,6 +186,7 @@ struct madera { struct regulator_bulk_data core_supplies[MADERA_MAX_CORE_SUPPLIES]; struct regulator *dcvdd; bool internal_dcvdd; + bool reset_errata; struct madera_pdata pdata; -- cgit v1.2.3 From 4556fe8f16e0225f5df7a57d123e0d55717bf2aa Mon Sep 17 00:00:00 2001 From: Michael Srba Date: Tue, 10 Nov 2020 14:00:47 +0100 Subject: mfd: rt5033: Fix errorneous defines Fix regulators on rt5033 by converting some values to bitmasks which were errorneously not defined as such in the header file. Cc: Beomho Seo Fixes: 0b271258544b ("mfd: rt5033: Add Richtek RT5033 driver core.") Signed-off-by: Michael Srba Signed-off-by: Lee Jones --- include/linux/mfd/rt5033-private.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rt5033-private.h b/include/linux/mfd/rt5033-private.h index f812105c538c..2d1895c3efbf 100644 --- a/include/linux/mfd/rt5033-private.h +++ b/include/linux/mfd/rt5033-private.h @@ -91,14 +91,14 @@ enum rt5033_reg { #define RT5033_RT_HZ_MASK 0x01 /* RT5033 control register */ -#define RT5033_CTRL_FCCM_BUCK_MASK 0x00 -#define RT5033_CTRL_BUCKOMS_MASK 0x01 -#define RT5033_CTRL_LDOOMS_MASK 0x02 -#define RT5033_CTRL_SLDOOMS_MASK 0x03 -#define RT5033_CTRL_EN_BUCK_MASK 0x04 -#define RT5033_CTRL_EN_LDO_MASK 0x05 -#define RT5033_CTRL_EN_SAFE_LDO_MASK 0x06 -#define RT5033_CTRL_LDO_SLEEP_MASK 0x07 +#define RT5033_CTRL_FCCM_BUCK_MASK BIT(0) +#define RT5033_CTRL_BUCKOMS_MASK BIT(1) +#define RT5033_CTRL_LDOOMS_MASK BIT(2) +#define RT5033_CTRL_SLDOOMS_MASK BIT(3) +#define RT5033_CTRL_EN_BUCK_MASK BIT(4) +#define RT5033_CTRL_EN_LDO_MASK BIT(5) +#define RT5033_CTRL_EN_SAFE_LDO_MASK BIT(6) +#define RT5033_CTRL_LDO_SLEEP_MASK BIT(7) /* RT5033 BUCK control register */ #define RT5033_BUCK_CTRL_MASK 0x1f @@ -247,11 +247,11 @@ enum rt5033_fuel_reg { #define RT5033_FUEL_BAT_PRESENT 0x02 /* RT5033 PMIC interrupts */ -#define RT5033_PMIC_IRQ_BUCKOCP 2 -#define RT5033_PMIC_IRQ_BUCKLV 3 -#define RT5033_PMIC_IRQ_SAFELDOLV 4 -#define RT5033_PMIC_IRQ_LDOLV 5 -#define RT5033_PMIC_IRQ_OT 6 -#define RT5033_PMIC_IRQ_VDDA_UV 7 +#define RT5033_PMIC_IRQ_BUCKOCP BIT(2) +#define RT5033_PMIC_IRQ_BUCKLV BIT(3) +#define RT5033_PMIC_IRQ_SAFELDOLV BIT(4) +#define RT5033_PMIC_IRQ_LDOLV BIT(5) +#define RT5033_PMIC_IRQ_OT BIT(6) +#define RT5033_PMIC_IRQ_VDDA_UV BIT(7) #endif /* __RT5033_PRIVATE_H__ */ -- cgit v1.2.3 From 295992fb815e791d14b18ef7cdbbaf1a76211a31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 14 Sep 2020 15:09:33 +0200 Subject: mm: introduce vma_set_file function v5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the new vma_set_file() function to allow changing vma->vm_file with the necessary refcount dance. v2: add more users of this. v3: add missing EXPORT_SYMBOL, rebase on mmap cleanup, add comments why we drop the reference on two occasions. v4: make it clear that changing an anonymous vma is illegal. v5: move vma_set_file to mm/util.c Signed-off-by: Christian König Reviewed-by: Daniel Vetter (v2) Reviewed-by: Jason Gunthorpe Acked-by: Andrew Morton Link: https://patchwork.freedesktop.org/patch/399360/ --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index db6ae4d3fb4e..47bff16c182d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2719,6 +2719,8 @@ static inline void vma_set_page_prot(struct vm_area_struct *vma) } #endif +void vma_set_file(struct vm_area_struct *vma, struct file *file); + #ifdef CONFIG_NUMA_BALANCING unsigned long change_prot_numa(struct vm_area_struct *vma, unsigned long start, unsigned long end); -- cgit v1.2.3 From 179a9cf79212bb3b96fb69a314583189cd863c5b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 17 Nov 2020 16:16:34 +0100 Subject: context_tracking: Don't implement exception_enter/exit() on CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK The typical steps with context tracking are: 1) Task runs in userspace 2) Task enters the kernel (syscall/exception/IRQ) 3) Task switches from context tracking state CONTEXT_USER to CONTEXT_KERNEL (user_exit()) 4) Task does stuff in kernel 5) Task switches from context tracking state CONTEXT_KERNEL to CONTEXT_USER (user_enter()) 6) Task exits the kernel If an exception fires between 5) and 6), the pt_regs and the context tracking disagree on the context of the faulted/trapped instruction. CONTEXT_KERNEL must be set before the exception handler, that's unconditional for those handlers that want to be able to call into schedule(), but CONTEXT_USER must be restored when the exception exits whereas pt_regs tells that we are resuming to kernel space. This can't be fixed with storing the context tracking state in a per-cpu or per-task variable since another exception may fire onto the current one and overwrite the saved state. Also the task can schedule. So it has to be stored in a per task stack. This is how exception_enter()/exception_exit() paper over the problem: 5) Task switches from context tracking state CONTEXT_KERNEL to CONTEXT_USER (user_enter()) 5.1) Exception fires 5.2) prev_state = exception_enter() // save CONTEXT_USER to prev_state // and set CONTEXT_KERNEL 5.3) Exception handler 5.4) exception_enter(prev_state) // restore CONTEXT_USER 5.5) Exception resumes 6) Task exits the kernel The condition to live without exception_enter()/exception_exit() is to forbid exceptions and IRQs between 2) and 3) and between 5) and 6), or if any is allowed to trigger, it won't call into context tracking, eg: NMIs, and it won't schedule. These requirements are met by architectures supporting CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK and those can therefore afford not to implement this hack. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201117151637.259084-3-frederic@kernel.org --- include/linux/context_tracking.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index d53cd331c4dd..bceb06498521 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -51,7 +51,8 @@ static inline enum ctx_state exception_enter(void) { enum ctx_state prev_ctx; - if (!context_tracking_enabled()) + if (IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) || + !context_tracking_enabled()) return 0; prev_ctx = this_cpu_read(context_tracking.state); @@ -63,7 +64,8 @@ static inline enum ctx_state exception_enter(void) static inline void exception_exit(enum ctx_state prev_ctx) { - if (context_tracking_enabled()) { + if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) && + context_tracking_enabled()) { if (prev_ctx != CONTEXT_KERNEL) context_tracking_enter(prev_ctx); } -- cgit v1.2.3 From 31f6a8c0a471be7d7d05c93eac50fcb729e79b9d Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Tue, 27 Oct 2020 18:07:11 +0000 Subject: sched/topology,schedutil: Wrap sched domains rebuild Add the rebuild_sched_domains_energy() function to wrap the functionality that rebuilds the scheduling domains if any of the Energy Aware Scheduling (EAS) initialisation conditions change. This functionality is used when schedutil is added or removed or when EAS is enabled or disabled through the sched_energy_aware sysctl. Therefore, create a single function that is used in both these cases and that can be later reused. Signed-off-by: Ionela Voinescu Signed-off-by: Peter Zijlstra (Intel) Acked-by: Quentin Perret Acked-by: Rafael J. Wysocki Link: https://lkml.kernel.org/r/20201027180713.7642-2-ionela.voinescu@arm.com --- include/linux/sched/topology.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 9ef7bf686a9f..8f0f778b7c91 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -225,6 +225,14 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu) #endif /* !CONFIG_SMP */ +#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) +extern void rebuild_sched_domains_energy(void); +#else +static inline void rebuild_sched_domains_energy(void) +{ +} +#endif + #ifndef arch_scale_cpu_capacity /** * arch_scale_cpu_capacity - get the capacity scale factor of a given CPU. -- cgit v1.2.3 From 25ece30561d247b2931b0d11d92e9c976a668771 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 9 Nov 2020 17:34:05 +0100 Subject: rtc: nvmem: remove nvram ABI The nvram sysfs attributes have been deprecated at least since v4.13, more than 3 years ago and nobody ever complained about the deprecation warning. Remove the sysfs attributes now. [Bartosz: remove the declaration of rtc_nvmem_unregister()] Signed-off-by: Alexandre Belloni Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20201109163409.24301-5-brgl@bgdev.pl --- include/linux/rtc.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 22d1575e4991..0983ab9faffb 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -120,10 +120,6 @@ struct rtc_device { bool registered; - /* Old ABI support */ - bool nvram_old_abi; - struct bin_attribute *nvram; - time64_t range_min; timeu64_t range_max; time64_t start_secs; @@ -250,14 +246,12 @@ extern int rtc_hctosys_ret; #ifdef CONFIG_RTC_NVMEM int rtc_nvmem_register(struct rtc_device *rtc, struct nvmem_config *nvmem_config); -void rtc_nvmem_unregister(struct rtc_device *rtc); #else static inline int rtc_nvmem_register(struct rtc_device *rtc, struct nvmem_config *nvmem_config) { return 0; } -static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {} #endif #ifdef CONFIG_RTC_INTF_SYSFS -- cgit v1.2.3 From 3a905c2d9544a418953d6c18668f0f853fbd9be9 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 9 Nov 2020 17:34:06 +0100 Subject: rtc: add devm_ prefix to rtc_nvmem_register() rtc_nvmem_register() is a managed interface. It doesn't require any release function to be called at driver detach. To avoid confusing driver authors, let's rename it to devm_rtc_nvmem_register() and add it to the list of managed interfaces in Documentation/. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201109163409.24301-6-brgl@bgdev.pl --- include/linux/rtc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 0983ab9faffb..cbca651d8ca4 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -244,11 +244,11 @@ extern int rtc_hctosys_ret; #endif #ifdef CONFIG_RTC_NVMEM -int rtc_nvmem_register(struct rtc_device *rtc, - struct nvmem_config *nvmem_config); +int devm_rtc_nvmem_register(struct rtc_device *rtc, + struct nvmem_config *nvmem_config); #else -static inline int rtc_nvmem_register(struct rtc_device *rtc, - struct nvmem_config *nvmem_config) +static inline int devm_rtc_nvmem_register(struct rtc_device *rtc, + struct nvmem_config *nvmem_config) { return 0; } -- cgit v1.2.3 From fdcfd854333be5b30377dc5daa9cd0fa1643a979 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 9 Nov 2020 17:34:08 +0100 Subject: rtc: rework rtc_register_device() resource management rtc_register_device() is a managed interface but it doesn't use devres by itself - instead it marks an rtc_device as "registered" and the devres callback for devm_rtc_allocate_device() takes care of resource release. This doesn't correspond with the design behind devres where managed structures should not be aware of being managed. The correct solution here is to register a separate devres callback for unregistering the device. While at it: rename rtc_register_device() to devm_rtc_register_device() and add it to the list of managed interfaces in devres.rst. This way we can avoid any potential confusion of driver developers who may expect there to exist a corresponding unregister function. Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201109163409.24301-8-brgl@bgdev.pl --- include/linux/rtc.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index cbca651d8ca4..55e7beed066c 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -118,8 +118,6 @@ struct rtc_device { */ long set_offset_nsec; - bool registered; - time64_t range_min; timeu64_t range_max; time64_t start_secs; @@ -157,7 +155,7 @@ extern struct rtc_device *devm_rtc_device_register(struct device *dev, const struct rtc_class_ops *ops, struct module *owner); struct rtc_device *devm_rtc_allocate_device(struct device *dev); -int __rtc_register_device(struct module *owner, struct rtc_device *rtc); +int __devm_rtc_register_device(struct module *owner, struct rtc_device *rtc); extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); @@ -234,8 +232,8 @@ static inline bool rtc_tv_nsec_ok(s64 set_offset_nsec, return false; } -#define rtc_register_device(device) \ - __rtc_register_device(THIS_MODULE, device) +#define devm_rtc_register_device(device) \ + __devm_rtc_register_device(THIS_MODULE, device) #ifdef CONFIG_RTC_HCTOSYS_DEVICE extern int rtc_hctosys_ret; -- cgit v1.2.3 From e1cef2d4c379b2aab43a7dc9601f645048209090 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Nov 2020 14:53:40 +0900 Subject: tools/bootconfig: Align the bootconfig applied initrd image size to 4 Align the bootconfig applied initrd image size to 4. To fill the gap, the bootconfig command uses null characters in between the bootconfig data and the footer. This will expands the footer size but don't change the checksum. Thus the block image of the initrd file with bootconfig is as follows. [initrd][bootconfig][(pad)][size][csum]["#BOOTCONFIG\n"] Link: https://lkml.kernel.org/r/160576522046.320071.8550680670010950634.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/bootconfig.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 9903088891fa..2696eb0fc149 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -12,6 +12,9 @@ #define BOOTCONFIG_MAGIC "#BOOTCONFIG\n" #define BOOTCONFIG_MAGIC_LEN 12 +#define BOOTCONFIG_ALIGN_SHIFT 2 +#define BOOTCONFIG_ALIGN (1 << BOOTCONFIG_ALIGN_SHIFT) +#define BOOTCONFIG_ALIGN_MASK (BOOTCONFIG_ALIGN - 1) /* XBC tree node */ struct xbc_node { -- cgit v1.2.3 From bc1c99a5971aa7571e8b9731c28fa32abe12cab8 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Tue, 17 Nov 2020 20:49:52 +0800 Subject: EDAC: Add DDR5 new memory type Add a new entry to 'enum mem_type' and a new string to 'edac_mem_types[]' for DDR5 new memory type. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck --- include/linux/edac.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 8f63245f7f7c..e64b73b556eb 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -181,6 +181,7 @@ static inline char *mc_event_error_type(const unsigned int err_type) * This is a variant of the DDR4 memories. * @MEM_LRDDR4: Load-Reduced DDR4 memory. * @MEM_LPDDR4: Low-Power DDR4 memory. + * @MEM_DDR5: Unbuffered DDR5 RAM * @MEM_NVDIMM: Non-volatile RAM * @MEM_WIO2: Wide I/O 2. */ @@ -208,6 +209,7 @@ enum mem_type { MEM_RDDR4, MEM_LRDDR4, MEM_LPDDR4, + MEM_DDR5, MEM_NVDIMM, MEM_WIO2, }; @@ -234,6 +236,7 @@ enum mem_type { #define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) #define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) #define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4) +#define MEM_FLAG_DDR5 BIT(MEM_DDR5) #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) #define MEM_FLAG_WIO2 BIT(MEM_WIO2) -- cgit v1.2.3 From dfe564045c653d9e6969ccca57a8a04771d333f7 Mon Sep 17 00:00:00 2001 From: chao Date: Sun, 30 Aug 2020 23:41:17 -0700 Subject: rcu: Panic after fixed number of stalls Some stalls are transient, so that system fully recovers. This commit therefore allows users to configure the number of stalls that must happen in order to trigger kernel panic. Signed-off-by: chao Signed-off-by: Paul E. McKenney --- include/linux/kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2f05e9128201..4b5fd3da5fe8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -536,6 +536,7 @@ extern int panic_on_warn; extern unsigned long panic_on_taint; extern bool panic_on_taint_nousertaint; extern int sysctl_panic_on_rcu_stall; +extern int sysctl_max_rcu_stall_to_panic; extern int sysctl_panic_on_stackoverflow; extern bool crash_kexec_post_notifiers; -- cgit v1.2.3 From 1eafe075bf9cb4db575be4ddf1b1c8256758714a Mon Sep 17 00:00:00 2001 From: Asif Rasheed Date: Sun, 20 Sep 2020 17:31:54 +0400 Subject: list.h: Update comment to explicitly note circular lists The students in the Operating System Lecture Section at the American University of Sharjah were confused by the header comment in include/linux/list.h, which says "Simple doubly linked list implementation". This comment means "simple" as in "not complex", but "simple" is often used in this context to mean "not circular". This commit therefore avoids this ambiguity by explicitly calling out "circular". Signed-off-by: Asif Rasheed Signed-off-by: Paul E. McKenney --- include/linux/list.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index a18c87b63376..89bdc92e75c3 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -9,7 +9,7 @@ #include /* - * Simple doubly linked list implementation. + * Circular doubly linked list implementation. * * Some of the internal functions ("__xxx") are useful when * manipulating whole lists rather than single entries, as -- cgit v1.2.3 From 2bf31d94423c8ae3ff58e38a115b177df6940399 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 16 Nov 2020 11:18:08 +0100 Subject: jbd2: fix kernel-doc markups Kernel-doc markup should use this format: identifier - description They should not have any type before that, as otherwise the parser won't do the right thing. Also, some identifiers have different names between their prototypes and the kernel-doc markup. Reviewed-by: Jan Kara Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/72f5c6628f5f278d67625f60893ffbc2ca28d46e.1605521731.git.mchehab+huawei@kernel.org Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1c49fd62ff2e..578ff196b3ce 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -401,7 +401,7 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) #define JI_WAIT_DATA (1 << __JI_WAIT_DATA) /** - * struct jbd_inode - The jbd_inode type is the structure linking inodes in + * struct jbd2_inode - The jbd_inode type is the structure linking inodes in * ordered mode present in a transaction so that we can sync them during commit. */ struct jbd2_inode { -- cgit v1.2.3 From a24d22b225ce158651378869a6b88105c4bdb887 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 12 Nov 2020 21:20:21 -0800 Subject: crypto: sha - split sha.h into sha1.h and sha2.h Currently contains declarations for both SHA-1 and SHA-2, and contains declarations for SHA-3. This organization is inconsistent, but more importantly SHA-1 is no longer considered to be cryptographically secure. So to the extent possible, SHA-1 shouldn't be grouped together with any of the other SHA versions, and usage of it should be phased out. Therefore, split into two headers and , and make everyone explicitly specify whether they want the declarations for SHA-1, SHA-2, or both. This avoids making the SHA-1 declarations visible to files that don't want anything to do with SHA-1. It also prepares for potentially moving sha1.h into a new insecure/ or dangerous/ directory. Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Acked-by: Jason A. Donenfeld Signed-off-by: Herbert Xu --- include/linux/ccp.h | 3 ++- include/linux/filter.h | 2 +- include/linux/purgatory.h | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ccp.h b/include/linux/ccp.h index a5dfbaf2470d..868924dec5a1 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -15,7 +15,8 @@ #include #include #include -#include +#include +#include struct ccp_device; struct ccp_cmd; diff --git a/include/linux/filter.h b/include/linux/filter.h index 72d62cbc1578..6c00140538b9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h index b950e961cfa8..d7dc1559427f 100644 --- a/include/linux/purgatory.h +++ b/include/linux/purgatory.h @@ -3,7 +3,7 @@ #define _LINUX_PURGATORY_H #include -#include +#include #include struct kexec_sha_region { -- cgit v1.2.3 From ded5ed04d85e299770dcb7e82c2127b8054a00c8 Mon Sep 17 00:00:00 2001 From: Souradeep Chowdhury Date: Wed, 30 Sep 2020 13:44:13 +0530 Subject: soc: qcom: llcc: Add configuration data for SM8150 Add LLCC configuration data for SM8150 SoC which controls LLCC behaviour. Signed-off-by: Souradeep Chowdhury Link: https://lore.kernel.org/r/957e3ae50c75720ef6227529d5ce3d4b457802e9.1601452132.git.schowdhu@codeaurora.org Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/llcc-qcom.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 90b864655822..3db6797ba6ff 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -16,6 +16,7 @@ #define LLCC_AUDIO 6 #define LLCC_MDMHPGRW 7 #define LLCC_MDM 8 +#define LLCC_MODHW 9 #define LLCC_CMPT 10 #define LLCC_GPUHTW 11 #define LLCC_GPU 12 @@ -26,6 +27,11 @@ #define LLCC_MDMHPFX 20 #define LLCC_MDMPNG 21 #define LLCC_AUDHW 22 +#define LLCC_NPU 23 +#define LLCC_WLHW 24 +#define LLCC_MODPE 29 +#define LLCC_APTCM 30 +#define LLCC_WRCACHE 31 /** * llcc_slice_desc - Cache slice descriptor -- cgit v1.2.3 From 69d98969a0540039fc04e0f22bbe9f41b0a13d66 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 10 Nov 2020 11:18:46 +0100 Subject: can: rename get_can_dlc() macro with can_cc_dlc2len() The get_can_dlc() macro is used to ensure the payload length information of the Classical CAN frame to be max 8 bytes (the CAN_MAX_DLEN). Rename the macro and use the correct constant in preparation of the len/dlc cleanup for Classical CAN frames. Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20201110101852.1973-3-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 41ff31795320..9bc84c6978ec 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -98,14 +98,14 @@ static inline unsigned int can_bit_time(const struct can_bittiming *bt) } /* - * get_can_dlc(value) - helper macro to cast a given data length code (dlc) - * to u8 and ensure the dlc value to be max. 8 bytes. + * can_cc_dlc2len(value) - convert a given data length code (dlc) of a + * Classical CAN frame into a valid data length of max. 8 bytes. * * To be used in the CAN netdriver receive path to ensure conformance with * ISO 11898-1 Chapter 8.4.2.3 (DLC field) */ -#define get_can_dlc(i) (min_t(u8, (i), CAN_MAX_DLC)) -#define get_canfd_dlc(i) (min_t(u8, (i), CANFD_MAX_DLC)) +#define can_cc_dlc2len(dlc) (min_t(u8, (dlc), CAN_MAX_DLEN)) +#define get_canfd_dlc(dlc) (min_t(u8, (dlc), CANFD_MAX_DLC)) /* Check for outgoing skbs that have not been created by the CAN subsystem */ static inline bool can_skb_headroom_valid(struct net_device *dev, -- cgit v1.2.3 From cd1124e76d740327be5d8f9ce3785ce1119daf4b Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 10 Nov 2020 11:18:47 +0100 Subject: can: remove obsolete get_canfd_dlc() macro The macro was always used together with can_dlc2len() which sanitizes the given dlc value on its own. Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20201110101852.1973-4-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 1 - include/linux/can/dev/peak_canfd.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 9bc84c6978ec..802606e36b58 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -105,7 +105,6 @@ static inline unsigned int can_bit_time(const struct can_bittiming *bt) * ISO 11898-1 Chapter 8.4.2.3 (DLC field) */ #define can_cc_dlc2len(dlc) (min_t(u8, (dlc), CAN_MAX_DLEN)) -#define get_canfd_dlc(dlc) (min_t(u8, (dlc), CANFD_MAX_DLC)) /* Check for outgoing skbs that have not been created by the CAN subsystem */ static inline bool can_skb_headroom_valid(struct net_device *dev, diff --git a/include/linux/can/dev/peak_canfd.h b/include/linux/can/dev/peak_canfd.h index 5fd627e9da19..f38772fd0c07 100644 --- a/include/linux/can/dev/peak_canfd.h +++ b/include/linux/can/dev/peak_canfd.h @@ -282,7 +282,7 @@ static inline int pucan_msg_get_channel(const struct pucan_rx_msg *msg) } /* return the dlc value from any received message channel_dlc field */ -static inline int pucan_msg_get_dlc(const struct pucan_rx_msg *msg) +static inline u8 pucan_msg_get_dlc(const struct pucan_rx_msg *msg) { return msg->channel_dlc >> 4; } -- cgit v1.2.3 From 964db79d6c186cc2ecc6ae46f98eed7e0ea8cf71 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Thu, 19 Nov 2020 18:53:55 +0100 Subject: of/address: Introduce of_dma_get_max_cpu_address() Introduce of_dma_get_max_cpu_address(), which provides the highest CPU physical address addressable by all DMA masters in the system. It's specially useful for setting memory zones sizes at early boot time. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20201119175400.9995-4-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas --- include/linux/of.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 5d51891cbf1a..9ed5b8532c30 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -558,6 +558,8 @@ int of_map_id(struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out); +phys_addr_t of_dma_get_max_cpu_address(struct device_node *np); + #else /* CONFIG_OF */ static inline void of_core_init(void) @@ -995,6 +997,11 @@ static inline int of_map_id(struct device_node *np, u32 id, return -EINVAL; } +static inline phys_addr_t of_dma_get_max_cpu_address(struct device_node *np) +{ + return PHYS_ADDR_MAX; +} + #define of_match_ptr(_ptr) NULL #define of_match_node(_matches, _node) NULL #endif /* CONFIG_OF */ -- cgit v1.2.3 From 2b8652936f0ca9ca2e6c984ae76c7bfcda1b3f22 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 19 Nov 2020 18:53:58 +0100 Subject: arm64: mm: Set ZONE_DMA size based on early IORT scan We recently introduced a 1 GB sized ZONE_DMA to cater for platforms incorporating masters that can address less than 32 bits of DMA, in particular the Raspberry Pi 4, which has 4 or 8 GB of DRAM, but has peripherals that can only address up to 1 GB (and its PCIe host bridge can only access the bottom 3 GB) Instructing the DMA layer about these limitations is straight-forward, even though we had to fix some issues regarding memory limits set in the IORT for named components, and regarding the handling of ACPI _DMA methods. However, the DMA layer also needs to be able to allocate memory that is guaranteed to meet those DMA constraints, for bounce buffering as well as allocating the backing for consistent mappings. This is why the 1 GB ZONE_DMA was introduced recently. Unfortunately, it turns out the having a 1 GB ZONE_DMA as well as a ZONE_DMA32 causes problems with kdump, and potentially in other places where allocations cannot cross zone boundaries. Therefore, we should avoid having two separate DMA zones when possible. So let's do an early scan of the IORT, and only create the ZONE_DMA if we encounter any devices that need it. This puts the burden on the firmware to describe such limitations in the IORT, which may be redundant (and less precise) if _DMA methods are also being provided. However, it should be noted that this situation is highly unusual for arm64 ACPI machines. Also, the DMA subsystem still gives precedence to the _DMA method if implemented, and so we will not lose the ability to perform streaming DMA outside the ZONE_DMA if the _DMA method permits it. [nsaenz: unified implementation with DT's counterpart] Signed-off-by: Ard Biesheuvel Signed-off-by: Nicolas Saenz Julienne Tested-by: Jeremy Linton Acked-by: Lorenzo Pieralisi Acked-by: Hanjun Guo Cc: Jeremy Linton Cc: Lorenzo Pieralisi Cc: Nicolas Saenz Julienne Cc: Rob Herring Cc: Christoph Hellwig Cc: Robin Murphy Cc: Hanjun Guo Cc: Sudeep Holla Cc: Anshuman Khandual Link: https://lore.kernel.org/r/20201119175400.9995-7-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas --- include/linux/acpi_iort.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 20a32120bb88..1a12baa58e40 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -38,6 +38,7 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size); const struct iommu_ops *iort_iommu_configure_id(struct device *dev, const u32 *id_in); int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); +phys_addr_t acpi_iort_dma_get_max_cpu_address(void); #else static inline void acpi_iort_init(void) { } static inline u32 iort_msi_map_id(struct device *dev, u32 id) @@ -55,6 +56,9 @@ static inline const struct iommu_ops *iort_iommu_configure_id( static inline int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } + +static inline phys_addr_t acpi_iort_dma_get_max_cpu_address(void) +{ return PHYS_ADDR_MAX; } #endif #endif /* __ACPI_IORT_H__ */ -- cgit v1.2.3 From 04435217f96869ac3a8f055ff68c5237a60bcd7e Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Thu, 19 Nov 2020 18:53:59 +0100 Subject: mm: Remove examples from enum zone_type comment We can't really list every setup in common code. On top of that they are unlikely to stay true for long as things change in the arch trees independently of this comment. Suggested-by: Christoph Hellwig Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20201119175400.9995-8-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas --- include/linux/mmzone.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fb3bf696c05e..9d0c454d23cd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -354,26 +354,6 @@ enum zone_type { * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit * platforms may need both zones as they support peripherals with * different DMA addressing limitations. - * - * Some examples: - * - * - i386 and x86_64 have a fixed 16M ZONE_DMA and ZONE_DMA32 for the - * rest of the lower 4G. - * - * - arm only uses ZONE_DMA, the size, up to 4G, may vary depending on - * the specific device. - * - * - arm64 has a fixed 1G ZONE_DMA and ZONE_DMA32 for the rest of the - * lower 4G. - * - * - powerpc only uses ZONE_DMA, the size, up to 2G, may vary - * depending on the specific device. - * - * - s390 uses ZONE_DMA fixed to the lower 2G. - * - * - ia64 and riscv only use ZONE_DMA32. - * - * - parisc uses neither. */ #ifdef CONFIG_ZONE_DMA ZONE_DMA, -- cgit v1.2.3 From 607a4672b458b12674b96724e2f9bd42a5e928c6 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 20 Nov 2020 10:55:17 +0000 Subject: firmware: arm_scmi: Add full list of sensor type enumeration SCMI v2.0 provides a big list of sensor type enumeration from the sensorUnits enumeration table of Distributed Management Task Force(DMTF) specification number DSP 0248 (Platform Level Data Model for Platform Monitoring and Control Specification). It is however not an exact replica of the sensorUnits enumeration table. Let us just update the table as per SCMI v2.0 specification. Link: https://lore.kernel.org/r/20201119174906.43862-3-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 81 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 9cd312a1ff92..13d75956aa91 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -163,11 +163,92 @@ struct scmi_sensor_info { */ enum scmi_sensor_class { NONE = 0x0, + UNSPEC = 0x1, TEMPERATURE_C = 0x2, + TEMPERATURE_F = 0x3, + TEMPERATURE_K = 0x4, VOLTAGE = 0x5, CURRENT = 0x6, POWER = 0x7, ENERGY = 0x8, + CHARGE = 0x9, + VOLTAMPERE = 0xA, + NITS = 0xB, + LUMENS = 0xC, + LUX = 0xD, + CANDELAS = 0xE, + KPA = 0xF, + PSI = 0x10, + NEWTON = 0x11, + CFM = 0x12, + RPM = 0x13, + HERTZ = 0x14, + SECS = 0x15, + MINS = 0x16, + HOURS = 0x17, + DAYS = 0x18, + WEEKS = 0x19, + MILS = 0x1A, + INCHES = 0x1B, + FEET = 0x1C, + CUBIC_INCHES = 0x1D, + CUBIC_FEET = 0x1E, + METERS = 0x1F, + CUBIC_CM = 0x20, + CUBIC_METERS = 0x21, + LITERS = 0x22, + FLUID_OUNCES = 0x23, + RADIANS = 0x24, + STERADIANS = 0x25, + REVOLUTIONS = 0x26, + CYCLES = 0x27, + GRAVITIES = 0x28, + OUNCES = 0x29, + POUNDS = 0x2A, + FOOT_POUNDS = 0x2B, + OUNCE_INCHES = 0x2C, + GAUSS = 0x2D, + GILBERTS = 0x2E, + HENRIES = 0x2F, + FARADS = 0x30, + OHMS = 0x31, + SIEMENS = 0x32, + MOLES = 0x33, + BECQUERELS = 0x34, + PPM = 0x35, + DECIBELS = 0x36, + DBA = 0x37, + DBC = 0x38, + GRAYS = 0x39, + SIEVERTS = 0x3A, + COLOR_TEMP_K = 0x3B, + BITS = 0x3C, + BYTES = 0x3D, + WORDS = 0x3E, + DWORDS = 0x3F, + QWORDS = 0x40, + PERCENTAGE = 0x41, + PASCALS = 0x42, + COUNTS = 0x43, + GRAMS = 0x44, + NEWTON_METERS = 0x45, + HITS = 0x46, + MISSES = 0x47, + RETRIES = 0x48, + OVERRUNS = 0x49, + UNDERRUNS = 0x4A, + COLLISIONS = 0x4B, + PACKETS = 0x4C, + MESSAGES = 0x4D, + CHARS = 0x4E, + ERRORS = 0x4F, + CORRECTED_ERRS = 0x50, + UNCORRECTABLE_ERRS = 0x51, + SQ_MILS = 0x52, + SQ_INCHES = 0x53, + SQ_FEET = 0x54, + SQ_CM = 0x55, + SQ_METERS = 0x56, }; /** -- cgit v1.2.3 From 1fe00b8b4276ddf335216f884cb719edbea129e1 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 19 Nov 2020 17:49:02 +0000 Subject: firmware: arm_scmi: Add SCMI v3.0 sensors descriptors extensions Add support for new SCMI v3.0 Sensors extensions related to new sensors' features, like multiple axis and update intervals, while keeping compatibility with SCMI v2.0 features. While at that, refactor and simplify all the internal helpers macros and move struct scmi_sensor_info to use only non-fixed-size typing. Link: https://lore.kernel.org/r/20201119174906.43862-3-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 139 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 135 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 13d75956aa91..0792b0be25a3 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -8,6 +8,7 @@ #ifndef _LINUX_SCMI_PROTOCOL_H #define _LINUX_SCMI_PROTOCOL_H +#include #include #include #include @@ -148,13 +149,135 @@ struct scmi_power_ops { u32 *state); }; +/** + * scmi_range_attrs - specifies a sensor or axis values' range + * @min_range: The minimum value which can be represented by the sensor/axis. + * @max_range: The maximum value which can be represented by the sensor/axis. + */ +struct scmi_range_attrs { + long long min_range; + long long max_range; +}; + +/** + * scmi_sensor_axis_info - describes one sensor axes + * @id: The axes ID. + * @type: Axes type. Chosen amongst one of @enum scmi_sensor_class. + * @scale: Power-of-10 multiplier applied to the axis unit. + * @name: NULL-terminated string representing axes name as advertised by + * SCMI platform. + * @extended_attrs: Flag to indicate the presence of additional extended + * attributes for this axes. + * @resolution: Extended attribute representing the resolution of the axes. + * Set to 0 if not reported by this axes. + * @exponent: Extended attribute representing the power-of-10 multiplier that + * is applied to the resolution field. Set to 0 if not reported by + * this axes. + * @attrs: Extended attributes representing minimum and maximum values + * measurable by this axes. Set to 0 if not reported by this sensor. + */ +struct scmi_sensor_axis_info { + unsigned int id; + unsigned int type; + int scale; + char name[SCMI_MAX_STR_SIZE]; + bool extended_attrs; + unsigned int resolution; + int exponent; + struct scmi_range_attrs attrs; +}; + +/** + * scmi_sensor_intervals_info - describes number and type of available update + * intervals + * @segmented: Flag for segmented intervals' representation. When True there + * will be exactly 3 intervals in @desc, with each entry + * representing a member of a segment in this order: + * {lowest update interval, highest update interval, step size} + * @count: Number of intervals described in @desc. + * @desc: Array of @count interval descriptor bitmask represented as detailed in + * the SCMI specification: it can be accessed using the accompanying + * macros. + * @prealloc_pool: A minimal preallocated pool of desc entries used to avoid + * lesser-than-64-bytes dynamic allocation for small @count + * values. + */ +struct scmi_sensor_intervals_info { + bool segmented; + unsigned int count; +#define SCMI_SENS_INTVL_SEGMENT_LOW 0 +#define SCMI_SENS_INTVL_SEGMENT_HIGH 1 +#define SCMI_SENS_INTVL_SEGMENT_STEP 2 + unsigned int *desc; +#define SCMI_SENS_INTVL_GET_SECS(x) FIELD_GET(GENMASK(20, 5), (x)) +#define SCMI_SENS_INTVL_GET_EXP(x) \ + ({ \ + int __signed_exp = FIELD_GET(GENMASK(4, 0), (x)); \ + \ + if (__signed_exp & BIT(4)) \ + __signed_exp |= GENMASK(31, 5); \ + __signed_exp; \ + }) +#define SCMI_MAX_PREALLOC_POOL 16 + unsigned int prealloc_pool[SCMI_MAX_PREALLOC_POOL]; +}; + +/** + * struct scmi_sensor_info - represents information related to one of the + * available sensors. + * @id: Sensor ID. + * @type: Sensor type. Chosen amongst one of @enum scmi_sensor_class. + * @scale: Power-of-10 multiplier applied to the sensor unit. + * @num_trip_points: Number of maximum configurable trip points. + * @async: Flag for asynchronous read support. + * @update: Flag for continuouos update notification support. + * @timestamped: Flag for timestamped read support. + * @tstamp_scale: Power-of-10 multiplier applied to the sensor timestamps to + * represent it in seconds. + * @num_axis: Number of supported axis if any. Reported as 0 for scalar sensors. + * @axis: Pointer to an array of @num_axis descriptors. + * @intervals: Descriptor of available update intervals. + * @sensor_config: A bitmask reporting the current sensor configuration as + * detailed in the SCMI specification: it can accessed and + * modified through the accompanying macros. + * @name: NULL-terminated string representing sensor name as advertised by + * SCMI platform. + * @extended_scalar_attrs: Flag to indicate the presence of additional extended + * attributes for this sensor. + * @sensor_power: Extended attribute representing the average power + * consumed by the sensor in microwatts (uW) when it is active. + * Reported here only for scalar sensors. + * Set to 0 if not reported by this sensor. + * @resolution: Extended attribute representing the resolution of the sensor. + * Reported here only for scalar sensors. + * Set to 0 if not reported by this sensor. + * @exponent: Extended attribute representing the power-of-10 multiplier that is + * applied to the resolution field. + * Reported here only for scalar sensors. + * Set to 0 if not reported by this sensor. + * @scalar_attrs: Extended attributes representing minimum and maximum + * measurable values by this sensor. + * Reported here only for scalar sensors. + * Set to 0 if not reported by this sensor. + */ struct scmi_sensor_info { - u32 id; - u8 type; - s8 scale; - u8 num_trip_points; + unsigned int id; + unsigned int type; + int scale; + unsigned int num_trip_points; bool async; + bool update; + bool timestamped; + int tstamp_scale; + unsigned int num_axis; + struct scmi_sensor_axis_info *axis; + struct scmi_sensor_intervals_info intervals; char name[SCMI_MAX_STR_SIZE]; + bool extended_scalar_attrs; + unsigned int sensor_power; + unsigned int resolution; + int exponent; + struct scmi_range_attrs scalar_attrs; }; /* @@ -249,6 +372,14 @@ enum scmi_sensor_class { SQ_FEET = 0x54, SQ_CM = 0x55, SQ_METERS = 0x56, + RADIANS_SEC = 0x57, + BPM = 0x58, + METERS_SEC_SQUARED = 0x59, + METERS_SEC = 0x5A, + CUBIC_METERS_SEC = 0x5B, + MM_MERCURY = 0x5C, + RADIANS_SEC_SQUARED = 0x5D, + OEM_UNIT = 0xFF }; /** -- cgit v1.2.3 From c7b74967799b1af52b3045d69d4c26836b2d41de Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 20 Nov 2020 11:04:44 +0100 Subject: can: replace can_dlc as variable/element for payload length The naming of can_dlc as element of struct can_frame and also as variable name is misleading as it claims to be a 'data length CODE' but in reality it always was a plain data length. With the indroduction of a new 'len' element in struct can_frame we can now remove can_dlc as name and make clear which of the former uses was a plain length (-> 'len') or a data length code (-> 'dlc') value. Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20201120100444.3199-1-socketcan@hartkopp.net [mkl: gs_usb: keep struct gs_host_frame::can_dlc as is] Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 802606e36b58..77da061c21c9 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -185,8 +185,8 @@ static inline void can_set_static_ctrlmode(struct net_device *dev, dev->mtu = CANFD_MTU; } -/* get data length from can_dlc with sanitized can_dlc */ -u8 can_dlc2len(u8 can_dlc); +/* get data length from raw data length code (DLC) */ +u8 can_dlc2len(u8 dlc); /* map the sanitized data length to an appropriate data length code */ u8 can_len2dlc(u8 len); -- cgit v1.2.3 From 3ab4ce0d6fa8c93d41df4a74ec8d2c9198be2109 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 10 Nov 2020 11:18:49 +0100 Subject: can: rename CAN FD related can_len2dlc and can_dlc2len helpers The helper functions can_len2dlc and can_dlc2len are only relevant for CAN FD data length code (DLC) conversion. To fit the introduced can_cc_dlc2len for Classical CAN we rename: can_dlc2len -> can_fd_dlc2len to get the payload length from the DLC can_len2dlc -> can_fd_len2dlc to get the DLC from the payload length Suggested-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20201110101852.1973-6-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 77da061c21c9..e767a96ae075 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -186,10 +186,10 @@ static inline void can_set_static_ctrlmode(struct net_device *dev, } /* get data length from raw data length code (DLC) */ -u8 can_dlc2len(u8 dlc); +u8 can_fd_dlc2len(u8 dlc); /* map the sanitized data length to an appropriate data length code */ -u8 can_len2dlc(u8 len); +u8 can_fd_len2dlc(u8 len); struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, unsigned int txqs, unsigned int rxqs); -- cgit v1.2.3 From e8e73562ce0b24d691ad35df3de34b324248458f Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 10 Nov 2020 16:49:12 +0100 Subject: can: drivers: introduce helpers to access Classical CAN DLC values This patch adds the following helper to functions to access Classical CAN DLC values. can_get_cc_dlc(): get the data length code for Classical CAN raw DLC access can_frame_set_cc_len(): set len and len8_dlc value for Classical CAN raw DLC access Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20201110154913.1404582-2-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index e767a96ae075..197a79535cc2 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -170,6 +170,31 @@ static inline bool can_is_canfd_skb(const struct sk_buff *skb) return skb->len == CANFD_MTU; } +/* helper to get the data length code (DLC) for Classical CAN raw DLC access */ +static inline u8 can_get_cc_dlc(const struct can_frame *cf, const u32 ctrlmode) +{ + /* return len8_dlc as dlc value only if all conditions apply */ + if ((ctrlmode & CAN_CTRLMODE_CC_LEN8_DLC) && + (cf->len == CAN_MAX_DLEN) && + (cf->len8_dlc > CAN_MAX_DLEN && cf->len8_dlc <= CAN_MAX_RAW_DLC)) + return cf->len8_dlc; + + /* return the payload length as dlc value */ + return cf->len; +} + +/* helper to set len and len8_dlc value for Classical CAN raw DLC access */ +static inline void can_frame_set_cc_len(struct can_frame *cf, const u8 dlc, + const u32 ctrlmode) +{ + /* the caller already ensured that dlc is a value from 0 .. 15 */ + if (ctrlmode & CAN_CTRLMODE_CC_LEN8_DLC && dlc > CAN_MAX_DLEN) + cf->len8_dlc = dlc; + + /* limit the payload length 'len' to CAN_MAX_DLEN */ + cf->len = can_cc_dlc2len(dlc); +} + /* helper to define static CAN controller features at device creation time */ static inline void can_set_static_ctrlmode(struct net_device *dev, u32 static_mode) -- cgit v1.2.3 From 757055ae8dedf5333af17b3b5b4b70ba9bc9da4e Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 11 Nov 2020 14:54:49 +0100 Subject: init/console: Use ttynull as a fallback when there is no console stdin, stdout, and stderr standard I/O stream are created for the init process. They are not available when there is no console registered for /dev/console. It might lead to a crash when the init process tries to use them, see the commit 48021f98130880dd742 ("printk: handle blank console arguments passed in."). Normally, ttySX and ttyX consoles are used as a fallback when no consoles are defined via the command line, device tree, or SPCR. But there will be no console registered when an invalid console name is configured or when the configured consoles do not exist on the system. Users even try to avoid the console intentionally, for example, by using console="" or console=null. It is used on production systems where the serial port or terminal are not visible to users. Pushing messages to these consoles would just unnecessary slowdown the system. Make sure that stdin, stdout, stderr, and /dev/console are always available by a fallback to the existing ttynull driver. It has been implemented for exactly this purpose but it was used only when explicitly configured. Reviewed-by: Greg Kroah-Hartman Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20201111135450.11214-2-pmladek@suse.com --- include/linux/console.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 4b1e26c4cb42..9c662e41cde5 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -187,9 +187,12 @@ extern int braille_register_console(struct console *, int index, extern int braille_unregister_console(struct console *); #ifdef CONFIG_TTY extern void console_sysfs_notify(void); +extern void register_ttynull_console(void); #else static inline void console_sysfs_notify(void) { } +static inline void register_ttynull_console(void) +{ } #endif extern bool console_suspend_enabled; -- cgit v1.2.3 From 2add5cacff3531e54c50b0832128299faa9f0563 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 19 Nov 2020 19:10:47 +0000 Subject: firmware: arm_scmi: Add voltage domain management protocol support SCMI v3.0 introduces voltage domain protocol which provides commands to: - Discover the voltage levels supported by a domain - Get the configuration and voltage level of a domain - Set the configuration and voltage level of a domain Let us add support for the same. Link: https://lore.kernel.org/r/20201119191051.46363-2-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 64 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 9cd312a1ff92..b11ca01e2393 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -209,6 +209,64 @@ struct scmi_reset_ops { int (*deassert)(const struct scmi_handle *handle, u32 domain); }; +/** + * struct scmi_voltage_info - describe one available SCMI Voltage Domain + * + * @id: the domain ID as advertised by the platform + * @segmented: defines the layout of the entries of array @levels_uv. + * - when True the entries are to be interpreted as triplets, + * each defining a segment representing a range of equally + * space voltages: , , + * - when False the entries simply represent a single discrete + * supported voltage level + * @negative_volts_allowed: True if any of the entries of @levels_uv represent + * a negative voltage. + * @attributes: represents Voltage Domain advertised attributes + * @name: name assigned to the Voltage Domain by platform + * @num_levels: number of total entries in @levels_uv. + * @levels_uv: array of entries describing the available voltage levels for + * this domain. + */ +struct scmi_voltage_info { + unsigned int id; + bool segmented; + bool negative_volts_allowed; + unsigned int attributes; + char name[SCMI_MAX_STR_SIZE]; + unsigned int num_levels; +#define SCMI_VOLTAGE_SEGMENT_LOW 0 +#define SCMI_VOLTAGE_SEGMENT_HIGH 1 +#define SCMI_VOLTAGE_SEGMENT_STEP 2 + int *levels_uv; +}; + +/** + * struct scmi_voltage_ops - represents the various operations provided + * by SCMI Voltage Protocol + * + * @num_domains_get: get the count of voltage domains provided by SCMI + * @info_get: get the information of the specified domain + * @config_set: set the config for the specified domain + * @config_get: get the config of the specified domain + * @level_set: set the voltage level for the specified domain + * @level_get: get the voltage level of the specified domain + */ +struct scmi_voltage_ops { + int (*num_domains_get)(const struct scmi_handle *handle); + const struct scmi_voltage_info __must_check *(*info_get) + (const struct scmi_handle *handle, u32 domain_id); + int (*config_set)(const struct scmi_handle *handle, u32 domain_id, + u32 config); +#define SCMI_VOLTAGE_ARCH_STATE_OFF 0x0 +#define SCMI_VOLTAGE_ARCH_STATE_ON 0x7 + int (*config_get)(const struct scmi_handle *handle, u32 domain_id, + u32 *config); + int (*level_set)(const struct scmi_handle *handle, u32 domain_id, + u32 flags, s32 volt_uV); + int (*level_get)(const struct scmi_handle *handle, u32 domain_id, + s32 *volt_uV); +}; + /** * struct scmi_notify_ops - represents notifications' operations provided by * SCMI core @@ -262,6 +320,7 @@ struct scmi_notify_ops { * @clk_ops: pointer to set of clock protocol operations * @sensor_ops: pointer to set of sensor protocol operations * @reset_ops: pointer to set of reset protocol operations + * @voltage_ops: pointer to set of voltage protocol operations * @notify_ops: pointer to set of notifications related operations * @perf_priv: pointer to private data structure specific to performance * protocol(for internal use only) @@ -273,6 +332,8 @@ struct scmi_notify_ops { * protocol(for internal use only) * @reset_priv: pointer to private data structure specific to reset * protocol(for internal use only) + * @voltage_priv: pointer to private data structure specific to voltage + * protocol(for internal use only) * @notify_priv: pointer to private data structure specific to notifications * (for internal use only) */ @@ -284,6 +345,7 @@ struct scmi_handle { const struct scmi_power_ops *power_ops; const struct scmi_sensor_ops *sensor_ops; const struct scmi_reset_ops *reset_ops; + const struct scmi_voltage_ops *voltage_ops; const struct scmi_notify_ops *notify_ops; /* for protocol internal use */ void *perf_priv; @@ -291,6 +353,7 @@ struct scmi_handle { void *power_priv; void *sensor_priv; void *reset_priv; + void *voltage_priv; void *notify_priv; void *system_priv; }; @@ -303,6 +366,7 @@ enum scmi_std_protocol { SCMI_PROTOCOL_CLOCK = 0x14, SCMI_PROTOCOL_SENSOR = 0x15, SCMI_PROTOCOL_RESET = 0x16, + SCMI_PROTOCOL_VOLTAGE = 0x17, }; enum scmi_system_events { -- cgit v1.2.3 From 341917490d7d68d2f7267a265b8820fc3f8ead1b Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Wed, 18 Nov 2020 23:49:20 +0100 Subject: PCI: Decode PCIe 64 GT/s link speed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCIe r6.0, sec 7.5.3.18, defines a new 64.0 GT/s bit in the Supported Link Speeds Vector of Link Capabilities 2. This patch does not affect the speed of the link, which should be negotiated automatically by the hardware; it only adds decoding when showing the speed to the user. Decode this new speed. Previously, reading the speed of a link operating at this speed showed "Unknown speed" instead of "64.0 GT/s". Link: https://lore.kernel.org/r/aaaab33fe18975e123a84aebce2adb85f44e2bbe.1605739760.git.gustavo.pimentel@synopsys.com Signed-off-by: Gustavo Pimentel Signed-off-by: Bjorn Helgaas Reviewed-by: Krzysztof Wilczyński --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 22207a79762c..e007bc3e8b6e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -281,6 +281,7 @@ enum pci_bus_speed { PCIE_SPEED_8_0GT = 0x16, PCIE_SPEED_16_0GT = 0x17, PCIE_SPEED_32_0GT = 0x18, + PCIE_SPEED_64_0GT = 0x19, PCI_SPEED_UNKNOWN = 0xff, }; -- cgit v1.2.3 From 956fb852181e4e4b16ccad62511e0038d3ed4928 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Wed, 18 Nov 2020 17:14:14 +0530 Subject: octeontx2-pf: move lmt flush to include/linux/soc On OcteonTX2 platform CPT instruction enqueue and NIX packet send are only possible via LMTST operations which uses LDEOR instruction. This patch moves lmt flush function from OcteonTX2 nic driver to include/linux/soc since it will be used by OcteonTX2 CPT and NIC driver for LMTST. Signed-off-by: Suheil Chandran Signed-off-by: Srujana Challa Signed-off-by: Jakub Kicinski --- include/linux/soc/marvell/octeontx2/asm.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 include/linux/soc/marvell/octeontx2/asm.h (limited to 'include/linux') diff --git a/include/linux/soc/marvell/octeontx2/asm.h b/include/linux/soc/marvell/octeontx2/asm.h new file mode 100644 index 000000000000..ae2279fe830a --- /dev/null +++ b/include/linux/soc/marvell/octeontx2/asm.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __SOC_OTX2_ASM_H +#define __SOC_OTX2_ASM_H + +#if defined(CONFIG_ARM64) +/* + * otx2_lmt_flush is used for LMT store operation. + * On octeontx2 platform CPT instruction enqueue and + * NIX packet send are only possible via LMTST + * operations and it uses LDEOR instruction targeting + * the coprocessor address. + */ +#define otx2_lmt_flush(ioaddr) \ +({ \ + u64 result = 0; \ + __asm__ volatile(".cpu generic+lse\n" \ + "ldeor xzr, %x[rf], [%[rs]]" \ + : [rf]"=r" (result) \ + : [rs]"r" (ioaddr)); \ + (result); \ +}) +#else +#define otx2_lmt_flush(ioaddr) ({ 0; }) +#endif + +#endif /* __SOC_OTX2_ASM_H */ -- cgit v1.2.3 From 0d8315dddd2899f519fe1ca3d4d5cdaf44ea421e Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Wed, 11 Nov 2020 07:33:54 -0600 Subject: seccomp/cache: Report cache data through /proc/pid/seccomp_cache Currently the kernel does not provide an infrastructure to translate architecture numbers to a human-readable name. Translating syscall numbers to syscall names is possible through FTRACE_SYSCALL infrastructure but it does not provide support for compat syscalls. This will create a file for each PID as /proc/pid/seccomp_cache. The file will be empty when no seccomp filters are loaded, or be in the format of: where ALLOW means the cache is guaranteed to allow the syscall, and filter means the cache will pass the syscall to the BPF filter. For the docker default profile on x86_64 it looks like: x86_64 0 ALLOW x86_64 1 ALLOW x86_64 2 ALLOW x86_64 3 ALLOW [...] x86_64 132 ALLOW x86_64 133 ALLOW x86_64 134 FILTER x86_64 135 FILTER x86_64 136 FILTER x86_64 137 ALLOW x86_64 138 ALLOW x86_64 139 FILTER x86_64 140 ALLOW x86_64 141 ALLOW [...] This file is guarded by CONFIG_SECCOMP_CACHE_DEBUG with a default of N because I think certain users of seccomp might not want the application to know which syscalls are definitely usable. For the same reason, it is also guarded by CAP_SYS_ADMIN. Suggested-by: Jann Horn Link: https://lore.kernel.org/lkml/CAG48ez3Ofqp4crXGksLmZY6=fGrF_tWyUCg7PBkAetvbbOPeOA@mail.gmail.com/ Signed-off-by: YiFei Zhu Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/94e663fa53136f5a11f432c661794d1ee7060779.1605101222.git.yifeifz2@illinois.edu --- include/linux/seccomp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 02aef2844c38..76963ec4641a 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -121,4 +121,11 @@ static inline long seccomp_get_metadata(struct task_struct *task, return -EINVAL; } #endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */ + +#ifdef CONFIG_SECCOMP_CACHE_DEBUG +struct seq_file; + +int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task); +#endif #endif /* _LINUX_SECCOMP_H */ -- cgit v1.2.3 From 4ae21993f07422ec1cb83e9530f87fa61bff02bd Mon Sep 17 00:00:00 2001 From: Antonio Cardace Date: Wed, 18 Nov 2020 21:45:17 +0100 Subject: ethtool: add ETHTOOL_COALESCE_ALL_PARAMS define This bitmask represents all existing coalesce parameters. Signed-off-by: Antonio Cardace Reviewed-by: Michal Kubecek Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 6408b446051f..e3da25b51ae4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -215,6 +215,7 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, #define ETHTOOL_COALESCE_TX_USECS_HIGH BIT(19) #define ETHTOOL_COALESCE_TX_MAX_FRAMES_HIGH BIT(20) #define ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL BIT(21) +#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(21, 0) #define ETHTOOL_COALESCE_USECS \ (ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS) -- cgit v1.2.3 From 5164c788985702ad94fa4ce6adca29bf280876df Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Tue, 29 Sep 2020 15:59:43 +0300 Subject: iio: triggered-buffer: add {devm_}iio_triggered_buffer_setup_ext variants This change adds a parameter to the {devm_}iio_triggered_buffer_setup() functions to assign the extra sysfs buffer attributes that are typically assigned via iio_buffer_set_attrs(). The functions also get renamed to iio_triggered_buffer_setup_ext() & devm_iio_triggered_buffer_setup_ext(). For backwards compatibility the old {devm_}iio_triggered_buffer_setup() functions are now macros wrap the new (renamed) functions with NULL for the buffer attrs. The aim is to remove iio_buffer_set_attrs(), so in the iio_triggered_buffer_setup_ext() function the attributes are assigned directly to 'buffer->attrs'. When adding multiple IIO buffers per IIO device, it can be pretty cumbersome to first allocate a set of buffers, then to dig them out of IIO to assign extra attributes (with iio_buffer_set_attrs()). Naturally, the best way would be to provide them at allocation time, which is what this change does. At this moment, buffers allocated with {devm_}iio_triggered_buffer_setup() are the only ones in mainline IIO to call iio_buffer_set_attrs(). Signed-off-by: Alexandru Ardelean Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200929125949.69934-4-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/triggered_buffer.h | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/triggered_buffer.h b/include/linux/iio/triggered_buffer.h index e99c91799359..7f154d1f8739 100644 --- a/include/linux/iio/triggered_buffer.h +++ b/include/linux/iio/triggered_buffer.h @@ -4,19 +4,28 @@ #include +struct attribute; struct iio_dev; struct iio_buffer_setup_ops; -int iio_triggered_buffer_setup(struct iio_dev *indio_dev, +int iio_triggered_buffer_setup_ext(struct iio_dev *indio_dev, irqreturn_t (*h)(int irq, void *p), irqreturn_t (*thread)(int irq, void *p), - const struct iio_buffer_setup_ops *setup_ops); + const struct iio_buffer_setup_ops *setup_ops, + const struct attribute **buffer_attrs); void iio_triggered_buffer_cleanup(struct iio_dev *indio_dev); -int devm_iio_triggered_buffer_setup(struct device *dev, - struct iio_dev *indio_dev, - irqreturn_t (*h)(int irq, void *p), - irqreturn_t (*thread)(int irq, void *p), - const struct iio_buffer_setup_ops *ops); +#define iio_triggered_buffer_setup(indio_dev, h, thread, setup_ops) \ + iio_triggered_buffer_setup_ext((indio_dev), (h), (thread), (setup_ops), NULL) + +int devm_iio_triggered_buffer_setup_ext(struct device *dev, + struct iio_dev *indio_dev, + irqreturn_t (*h)(int irq, void *p), + irqreturn_t (*thread)(int irq, void *p), + const struct iio_buffer_setup_ops *ops, + const struct attribute **buffer_attrs); + +#define devm_iio_triggered_buffer_setup(dev, indio_dev, h, thread, setup_ops) \ + devm_iio_triggered_buffer_setup_ext((dev), (indio_dev), (h), (thread), (setup_ops), NULL) #endif -- cgit v1.2.3 From 21232b4456ba5e1eea7385bd3c4b1994994fd409 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Tue, 29 Sep 2020 15:59:49 +0300 Subject: iio: buffer: remove iio_buffer_set_attrs() helper The iio_buffer_set_attrs() is no longer used in the drivers, so it can be removed now. Signed-off-by: Alexandru Ardelean Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200929125949.69934-10-alexandru.ardelean@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index fbba4093f06c..8febc23f5f26 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -11,9 +11,6 @@ struct iio_buffer; -void iio_buffer_set_attrs(struct iio_buffer *buffer, - const struct attribute **attrs); - int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data); /** -- cgit v1.2.3 From e2083d36739168f7b612312160cf7bb45b251408 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 19 Nov 2020 17:49:04 +0000 Subject: firmware: arm_scmi: Add SCMI v3.0 sensors timestamped reads Add new .reading_get_timestamped() method to sensor_ops to support SCMI v3.0 timestamped reads. Link: https://lore.kernel.org/r/20201119174906.43862-5-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 0792b0be25a3..0c52bf0cbee4 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -149,6 +149,20 @@ struct scmi_power_ops { u32 *state); }; +/** + * scmi_sensor_reading - represent a timestamped read + * + * Used by @reading_get_timestamped method. + * + * @value: The signed value sensor read. + * @timestamp: An unsigned timestamp for the sensor read, as provided by + * SCMI platform. Set to zero when not available. + */ +struct scmi_sensor_reading { + long long value; + unsigned long long timestamp; +}; + /** * scmi_range_attrs - specifies a sensor or axis values' range * @min_range: The minimum value which can be represented by the sensor/axis. @@ -390,6 +404,11 @@ enum scmi_sensor_class { * @info_get: get the information of the specified sensor * @trip_point_config: selects and configures a trip-point of interest * @reading_get: gets the current value of the sensor + * @reading_get_timestamped: gets the current value and timestamp, when + * available, of the sensor. (as of v3.0 spec) + * Supports multi-axis sensors for sensors which + * supports it and if the @reading array size of + * @count entry equals the sensor num_axis */ struct scmi_sensor_ops { int (*count_get)(const struct scmi_handle *handle); @@ -399,6 +418,9 @@ struct scmi_sensor_ops { u32 sensor_id, u8 trip_id, u64 trip_value); int (*reading_get)(const struct scmi_handle *handle, u32 sensor_id, u64 *value); + int (*reading_get_timestamped)(const struct scmi_handle *handle, + u32 sensor_id, u8 count, + struct scmi_sensor_reading *readings); }; /** -- cgit v1.2.3 From 7b83c5f41088987d04e24c3af0e1fb9f43b747b5 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 19 Nov 2020 17:49:05 +0000 Subject: firmware: arm_scmi: Add SCMI v3.0 sensor configuration support Add SCMI v3.0 sensor support for CONFIG_GET/CONFIG_SET commands. Link: https://lore.kernel.org/r/20201119174906.43862-6-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 0c52bf0cbee4..7e9e2cd3d46b 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -286,7 +286,38 @@ struct scmi_sensor_info { unsigned int num_axis; struct scmi_sensor_axis_info *axis; struct scmi_sensor_intervals_info intervals; + unsigned int sensor_config; +#define SCMI_SENS_CFG_UPDATE_SECS_MASK GENMASK(31, 16) +#define SCMI_SENS_CFG_GET_UPDATE_SECS(x) \ + FIELD_GET(SCMI_SENS_CFG_UPDATE_SECS_MASK, (x)) + +#define SCMI_SENS_CFG_UPDATE_EXP_MASK GENMASK(15, 11) +#define SCMI_SENS_CFG_GET_UPDATE_EXP(x) \ + ({ \ + int __signed_exp = \ + FIELD_GET(SCMI_SENS_CFG_UPDATE_EXP_MASK, (x)); \ + \ + if (__signed_exp & BIT(4)) \ + __signed_exp |= GENMASK(31, 5); \ + __signed_exp; \ + }) + +#define SCMI_SENS_CFG_ROUND_MASK GENMASK(10, 9) +#define SCMI_SENS_CFG_ROUND_AUTO 2 +#define SCMI_SENS_CFG_ROUND_UP 1 +#define SCMI_SENS_CFG_ROUND_DOWN 0 + +#define SCMI_SENS_CFG_TSTAMP_ENABLED_MASK BIT(1) +#define SCMI_SENS_CFG_TSTAMP_ENABLE 1 +#define SCMI_SENS_CFG_TSTAMP_DISABLE 0 +#define SCMI_SENS_CFG_IS_TSTAMP_ENABLED(x) \ + FIELD_GET(SCMI_SENS_CFG_TSTAMP_ENABLED_MASK, (x)) + +#define SCMI_SENS_CFG_SENSOR_ENABLED_MASK BIT(0) +#define SCMI_SENS_CFG_SENSOR_ENABLE 1 +#define SCMI_SENS_CFG_SENSOR_DISABLE 0 char name[SCMI_MAX_STR_SIZE]; +#define SCMI_SENS_CFG_IS_ENABLED(x) FIELD_GET(BIT(0), (x)) bool extended_scalar_attrs; unsigned int sensor_power; unsigned int resolution; @@ -409,6 +440,8 @@ enum scmi_sensor_class { * Supports multi-axis sensors for sensors which * supports it and if the @reading array size of * @count entry equals the sensor num_axis + * @config_get: Get sensor current configuration + * @config_set: Set sensor current configuration */ struct scmi_sensor_ops { int (*count_get)(const struct scmi_handle *handle); @@ -421,6 +454,10 @@ struct scmi_sensor_ops { int (*reading_get_timestamped)(const struct scmi_handle *handle, u32 sensor_id, u8 count, struct scmi_sensor_reading *readings); + int (*config_get)(const struct scmi_handle *handle, + u32 sensor_id, u32 *sensor_config); + int (*config_set)(const struct scmi_handle *handle, + u32 sensor_id, u32 sensor_config); }; /** -- cgit v1.2.3 From e3811190acf85c63518fbddaa28bcbfab2baa58d Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 19 Nov 2020 17:49:06 +0000 Subject: firmware: arm_scmi: Add SCMI v3.0 sensor notifications Add support for new SCMI v3.0 SENSOR_UPDATE notification. Link: https://lore.kernel.org/r/20201119174906.43862-7-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 7e9e2cd3d46b..be0be5ff7514 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -657,6 +657,7 @@ enum scmi_notification_events { SCMI_EVENT_PERFORMANCE_LIMITS_CHANGED = 0x0, SCMI_EVENT_PERFORMANCE_LEVEL_CHANGED = 0x1, SCMI_EVENT_SENSOR_TRIP_POINT_EVENT = 0x0, + SCMI_EVENT_SENSOR_UPDATE = 0x1, SCMI_EVENT_RESET_ISSUED = 0x0, SCMI_EVENT_BASE_ERROR_EVENT = 0x0, SCMI_EVENT_SYSTEM_POWER_STATE_NOTIFIER = 0x0, @@ -698,6 +699,14 @@ struct scmi_sensor_trip_point_report { unsigned int trip_point_desc; }; +struct scmi_sensor_update_report { + ktime_t timestamp; + unsigned int agent_id; + unsigned int sensor_id; + unsigned int readings_count; + struct scmi_sensor_reading readings[]; +}; + struct scmi_reset_issued_report { ktime_t timestamp; unsigned int agent_id; -- cgit v1.2.3 From bc2dc4406c463174613047d8b7946e12c8808cda Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sat, 21 Nov 2020 22:17:01 -0800 Subject: compiler-clang: remove version check for BPF Tracing bpftrace parses the kernel headers and uses Clang under the hood. Remove the version check when __BPF_TRACING__ is defined (as bpftrace does) so that this tool can continue to parse kernel headers, even with older clang sources. Fixes: commit 1f7a44f63e6c ("compiler-clang: add build check for clang 10.0.1") Reported-by: Chen Yu Reported-by: Jarkko Sakkinen Signed-off-by: Nick Desaulniers Signed-off-by: Andrew Morton Tested-by: Jarkko Sakkinen Acked-by: Jarkko Sakkinen Acked-by: Song Liu Acked-by: Nathan Chancellor Acked-by: Miguel Ojeda Link: https://lkml.kernel.org/r/20201104191052.390657-1-ndesaulniers@google.com Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index dd7233c48bf3..98cff1b4b088 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -8,8 +8,10 @@ + __clang_patchlevel__) #if CLANG_VERSION < 100001 +#ifndef __BPF_TRACING__ # error Sorry, your version of Clang is too old - please use 10.0.1 or newer. #endif +#endif /* Compiler specific definitions for Clang compiler */ -- cgit v1.2.3 From a927bd6ba952d13c52b8b385030943032f659a3e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 21 Nov 2020 22:17:05 -0800 Subject: mm: fix phys_to_target_node() and memory_add_physaddr_to_nid() exports The core-mm has a default __weak implementation of phys_to_target_node() to mirror the weak definition of memory_add_physaddr_to_nid(). That symbol is exported for modules. However, while the export in mm/memory_hotplug.c exported the symbol in the configuration cases of: CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_HOTPLUG=y ...and: CONFIG_NUMA_KEEP_MEMINFO=n CONFIG_MEMORY_HOTPLUG=y ...it failed to export the symbol in the case of: CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_HOTPLUG=n Not only is that broken, but Christoph points out that the kernel should not be exporting any __weak symbol, which means that memory_add_physaddr_to_nid() example that phys_to_target_node() copied is broken too. Rework the definition of phys_to_target_node() and memory_add_physaddr_to_nid() to not require weak symbols. Move to the common arch override design-pattern of an asm header defining a symbol to replace the default implementation. The only common header that all memory_add_physaddr_to_nid() producing architectures implement is asm/sparsemem.h. In fact, powerpc already defines its memory_add_physaddr_to_nid() helper in sparsemem.h. Double-down on that observation and define phys_to_target_node() where necessary in asm/sparsemem.h. An alternate consideration that was discarded was to put this override in asm/numa.h, but that entangles with the definition of MAX_NUMNODES relative to the inclusion of linux/nodemask.h, and requires powerpc to grow a new header. The dependency on NUMA_KEEP_MEMINFO for DEV_DAX_HMEM_DEVICES is invalid now that the symbol is properly exported / stubbed in all combinations of CONFIG_NUMA_KEEP_MEMINFO and CONFIG_MEMORY_HOTPLUG. [dan.j.williams@intel.com: v4] Link: https://lkml.kernel.org/r/160461461867.1505359.5301571728749534585.stgit@dwillia2-desk3.amr.corp.intel.com [dan.j.williams@intel.com: powerpc: fix create_section_mapping compile warning] Link: https://lkml.kernel.org/r/160558386174.2948926.2740149041249041764.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: a035b6bf863e ("mm/memory_hotplug: introduce default phys_to_target_node() implementation") Reported-by: Randy Dunlap Reported-by: Thomas Gleixner Reported-by: kernel test robot Reported-by: Christoph Hellwig Signed-off-by: Dan Williams Signed-off-by: Andrew Morton Tested-by: Randy Dunlap Tested-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Christoph Hellwig Cc: Joao Martins Cc: Tony Luck Cc: Fenghua Yu Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Vishal Verma Cc: Stephen Rothwell Link: https://lkml.kernel.org/r/160447639846.1133764.7044090803980177548.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 14 -------------- include/linux/numa.h | 30 +++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index d65c6fdc5cfc..551093b74596 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -281,20 +281,6 @@ static inline bool movable_node_is_enabled(void) } #endif /* ! CONFIG_MEMORY_HOTPLUG */ -#ifdef CONFIG_NUMA -extern int memory_add_physaddr_to_nid(u64 start); -extern int phys_to_target_node(u64 start); -#else -static inline int memory_add_physaddr_to_nid(u64 start) -{ - return 0; -} -static inline int phys_to_target_node(u64 start) -{ - return 0; -} -#endif - #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) /* * pgdat resizing functions diff --git a/include/linux/numa.h b/include/linux/numa.h index 8cb33ccfb671..cb44cfe2b725 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -21,13 +21,41 @@ #endif #ifdef CONFIG_NUMA +#include +#include + /* Generic implementation available */ int numa_map_to_online_node(int node); -#else + +#ifndef memory_add_physaddr_to_nid +static inline int memory_add_physaddr_to_nid(u64 start) +{ + pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n", + start); + return 0; +} +#endif +#ifndef phys_to_target_node +static inline int phys_to_target_node(u64 start) +{ + pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n", + start); + return 0; +} +#endif +#else /* !CONFIG_NUMA */ static inline int numa_map_to_online_node(int node) { return NUMA_NO_NODE; } +static inline int memory_add_physaddr_to_nid(u64 start) +{ + return 0; +} +static inline int phys_to_target_node(u64 start) +{ + return 0; +} #endif #endif /* _LINUX_NUMA_H */ -- cgit v1.2.3 From 4349a83a3190c1d4414371161b0f4a4c3ccd3f9d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 21 Nov 2020 22:17:08 -0800 Subject: mm: fix readahead_page_batch for retry entries Both btrfs and fuse have reported faults caused by seeing a retry entry instead of the page they were looking for. This was caused by a missing check in the iterator. As can be seen in the below panic log, the accessing 0x402 causes a panic. In the xarray.h, 0x402 means RETRY_ENTRY. BUG: kernel NULL pointer dereference, address: 0000000000000402 CPU: 14 PID: 306003 Comm: as Not tainted 5.9.0-1-amd64 #1 Debian 5.9.1-1 Hardware name: Lenovo ThinkSystem SR665/7D2VCTO1WW, BIOS D8E106Q-1.01 05/30/2020 RIP: 0010:fuse_readahead+0x152/0x470 [fuse] Code: 41 8b 57 18 4c 8d 54 10 ff 4c 89 d6 48 8d 7c 24 10 e8 d2 e3 28 f9 48 85 c0 0f 84 fe 00 00 00 44 89 f2 49 89 04 d4 44 8d 72 01 <48> 8b 10 41 8b 4f 1c 48 c1 ea 10 83 e2 01 80 fa 01 19 d2 81 e2 01 RSP: 0018:ffffad99ceaebc50 EFLAGS: 00010246 RAX: 0000000000000402 RBX: 0000000000000001 RCX: 0000000000000002 RDX: 0000000000000000 RSI: ffff94c5af90bd98 RDI: ffffad99ceaebc60 RBP: ffff94ddc1749a00 R08: 0000000000000402 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000100 R12: ffff94de6c429ce0 R13: ffff94de6c4d3700 R14: 0000000000000001 R15: ffffad99ceaebd68 FS: 00007f228c5c7040(0000) GS:ffff94de8ed80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000402 CR3: 0000001dbd9b4000 CR4: 0000000000350ee0 Call Trace: read_pages+0x83/0x270 page_cache_readahead_unbounded+0x197/0x230 generic_file_buffered_read+0x57a/0xa20 new_sync_read+0x112/0x1a0 vfs_read+0xf8/0x180 ksys_read+0x5f/0xe0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 042124cc64c3 ("mm: add new readahead_control API") Reported-by: David Sterba Reported-by: Wonhyuk Yang Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: Link: https://lkml.kernel.org/r/20201103142852.8543-1-willy@infradead.org Link: https://lkml.kernel.org/r/20201103124349.16722-1-vvghjk1234@gmail.com Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e1e19c1f9ec9..d5570deff400 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -906,6 +906,8 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, xas_set(&xas, rac->_index); rcu_read_lock(); xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) { + if (xas_retry(&xas, page)) + continue; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageTail(page), page); array[i++] = page; -- cgit v1.2.3 From e44cdff05145b84293e3f424daa17e4f3ce0109c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 19 Nov 2020 17:45:09 +0100 Subject: clk: samsung: Allow compile testing of Exynos, S3C64xx and S5Pv210 So far all Exynos, S3C64xx and S5Pv210 clock units were selected by respective SOC/ARCH Kconfig option. On a kernel built for selected SoCs, this allowed to build only limited set of matching clock drivers. However compile testing was not possible in such case as Makefile object depends on SOC/ARCH option. Add separate Kconfig options for each of them to be able to compile test. Link: https://lore.kernel.org/r/20201119164509.754851-1-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski Acked-by: Chanwoo Choi Signed-off-by: Sylwester Nawrocki --- include/linux/clk/samsung.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk/samsung.h b/include/linux/clk/samsung.h index 79097e365f7f..38b774001712 100644 --- a/include/linux/clk/samsung.h +++ b/include/linux/clk/samsung.h @@ -10,7 +10,7 @@ struct device_node; -#ifdef CONFIG_ARCH_S3C64XX +#ifdef CONFIG_S3C64XX_COMMON_CLK void s3c64xx_clk_init(struct device_node *np, unsigned long xtal_f, unsigned long xusbxti_f, bool s3c6400, void __iomem *base); @@ -19,7 +19,7 @@ static inline void s3c64xx_clk_init(struct device_node *np, unsigned long xtal_f, unsigned long xusbxti_f, bool s3c6400, void __iomem *base) { } -#endif /* CONFIG_ARCH_S3C64XX */ +#endif /* CONFIG_S3C64XX_COMMON_CLK */ #ifdef CONFIG_S3C2410_COMMON_CLK void s3c2410_common_clk_init(struct device_node *np, unsigned long xti_f, -- cgit v1.2.3 From 769dda58d1f647a45270db2f02efe2e2de856709 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Nov 2020 15:02:10 +0100 Subject: irqstat: Get rid of nmi_count() and __IRQ_STAT() Nothing uses this anymore. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20201113141733.005212732@linutronix.de --- include/linux/irq_cpustat.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq_cpustat.h b/include/linux/irq_cpustat.h index 6e8895cd4d92..78fb2de3ea4d 100644 --- a/include/linux/irq_cpustat.h +++ b/include/linux/irq_cpustat.h @@ -19,10 +19,6 @@ #ifndef __ARCH_IRQ_STAT DECLARE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat); /* defined in asm/hardirq.h */ -#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat.member, cpu)) #endif -/* arch dependent irq_stat fields */ -#define nmi_count(cpu) __IRQ_STAT((cpu), __nmi_count) /* i386 */ - #endif /* __irq_cpustat_h */ -- cgit v1.2.3 From e091bc90cd2d65f48e4688faead2911558d177d7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Nov 2020 15:02:16 +0100 Subject: irqstat: Move declaration into asm-generic/hardirq.h Move the declaration of the irq_cpustat per cpu variable to asm-generic/hardirq.h and remove the now empty linux/irq_cpustat.h header. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20201113141733.737377332@linutronix.de --- include/linux/irq_cpustat.h | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 include/linux/irq_cpustat.h (limited to 'include/linux') diff --git a/include/linux/irq_cpustat.h b/include/linux/irq_cpustat.h deleted file mode 100644 index 78fb2de3ea4d..000000000000 --- a/include/linux/irq_cpustat.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __irq_cpustat_h -#define __irq_cpustat_h - -/* - * Contains default mappings for irq_cpustat_t, used by almost every - * architecture. Some arch (like s390) have per cpu hardware pages and - * they define their own mappings for irq_stat. - * - * Keith Owens July 2000. - */ - - -/* - * Simple wrappers reducing source bloat. Define all irq_stat fields - * here, even ones that are arch dependent. That way we get common - * definitions instead of differing sets for each arch. - */ - -#ifndef __ARCH_IRQ_STAT -DECLARE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat); /* defined in asm/hardirq.h */ -#endif - -#endif /* __irq_cpustat_h */ -- cgit v1.2.3 From 15115830c88751ba83068aa37da996602ddc6a61 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Nov 2020 15:02:17 +0100 Subject: preempt: Cleanup the macro maze a bit Make the macro maze consistent and prepare it for adding the RT variant for BH accounting. - Use nmi_count() for the NMI portion of preempt count - Introduce in_hardirq() to make the naming consistent and non-ambiguos - Use the macros to create combined checks (e.g. in_task()) so the softirq representation for RT just falls into place. - Update comments and move the deprecated macros aside Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20201113141733.864469886@linutronix.de --- include/linux/preempt.h | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 7d9c1c0e149c..7547857516d5 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -77,31 +77,33 @@ /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */ #include +#define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #define softirq_count() (preempt_count() & SOFTIRQ_MASK) -#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ - | NMI_MASK)) +#define irq_count() (nmi_count() | hardirq_count() | softirq_count()) /* - * Are we doing bottom half or hardware interrupt processing? + * Macros to retrieve the current execution context: * - * in_irq() - We're in (hard) IRQ context + * in_nmi() - We're in NMI context + * in_hardirq() - We're in hard IRQ context + * in_serving_softirq() - We're in softirq context + * in_task() - We're in task context + */ +#define in_nmi() (nmi_count()) +#define in_hardirq() (hardirq_count()) +#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) +#define in_task() (!(in_nmi() | in_hardirq() | in_serving_softirq())) + +/* + * The following macros are deprecated and should not be used in new code: + * in_irq() - Obsolete version of in_hardirq() * in_softirq() - We have BH disabled, or are processing softirqs * in_interrupt() - We're in NMI,IRQ,SoftIRQ context or have BH disabled - * in_serving_softirq() - We're in softirq context - * in_nmi() - We're in NMI context - * in_task() - We're in task context - * - * Note: due to the BH disabled confusion: in_softirq(),in_interrupt() really - * should not be used in new code. */ #define in_irq() (hardirq_count()) #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) -#define in_nmi() (preempt_count() & NMI_MASK) -#define in_task() (!(preempt_count() & \ - (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) /* * The preempt_count offset after preempt_disable(); -- cgit v1.2.3 From cb4789b0d19ff231ce9f73376a023341300aed96 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 6 Nov 2020 16:50:47 +0100 Subject: iommu/ioasid: Add ioasid references Let IOASID users take references to existing ioasids with ioasid_get(). ioasid_put() drops a reference and only frees the ioasid when its reference number is zero. It returns true if the ioasid was freed. For drivers that don't call ioasid_get(), ioasid_put() is the same as ioasid_free(). Signed-off-by: Jean-Philippe Brucker Reviewed-by: Eric Auger Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20201106155048.997886-2-jean-philippe@linaro.org Signed-off-by: Will Deacon --- include/linux/ioasid.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index 6f000d7a0ddc..e9dacd4b9f6b 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -34,7 +34,8 @@ struct ioasid_allocator_ops { #if IS_ENABLED(CONFIG_IOASID) ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, void *private); -void ioasid_free(ioasid_t ioasid); +void ioasid_get(ioasid_t ioasid); +bool ioasid_put(ioasid_t ioasid); void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)); int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); @@ -48,10 +49,15 @@ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, return INVALID_IOASID; } -static inline void ioasid_free(ioasid_t ioasid) +static inline void ioasid_get(ioasid_t ioasid) { } +static inline bool ioasid_put(ioasid_t ioasid) +{ + return false; +} + static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)) { -- cgit v1.2.3 From b713c195d59332277a31a59c91f755e53b5b302b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 5 Sep 2020 11:13:35 -0600 Subject: net: provide __sys_shutdown_sock() that takes a socket No functional changes in this patch, needed to provide io_uring support for shutdown(2). Cc: netdev@vger.kernel.org Cc: David S. Miller Acked-by: Jakub Kicinski Signed-off-by: Jens Axboe --- include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index e9cb30d8cbfb..385894b4a8bb 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -436,6 +436,7 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_socketpair(int family, int type, int protocol, int __user *usockvec); +extern int __sys_shutdown_sock(struct socket *sock, int how); extern int __sys_shutdown(int fd, int how); extern struct ns_common *get_net_ns(struct ns_common *ns); -- cgit v1.2.3 From 23acdc76f1798b090bb9dcc90671cd29d929834e Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 12 Nov 2020 18:53:34 -0800 Subject: signal: clear non-uapi flag bits when passing/returning sa_flags Previously we were not clearing non-uapi flag bits in sigaction.sa_flags when storing the userspace-provided sa_flags or when returning them via oldact. Start doing so. This allows userspace to detect missing support for flag bits and allows the kernel to use non-uapi bits internally, as we are already doing in arch/x86 for two flag bits. Now that this change is in place, we no longer need the code in arch/x86 that was hiding these bits from userspace, so remove it. This is technically a userspace-visible behavior change for sigaction, as the unknown bits returned via oldact.sa_flags are no longer set. However, we are free to define the behavior for unknown bits exactly because their behavior is currently undefined, so for now we can define the meaning of each of them to be "clear the bit in oldact.sa_flags unless the bit becomes known in the future". Furthermore, this behavior is consistent with OpenBSD [1], illumos [2] and XNU [3] (FreeBSD [4] and NetBSD [5] fail the syscall if unknown bits are set). So there is some precedent for this behavior in other kernels, and in particular in XNU, which is probably the most popular kernel among those that I looked at, which means that this change is less likely to be a compatibility issue. Link: [1] https://github.com/openbsd/src/blob/f634a6a4b5bf832e9c1de77f7894ae2625e74484/sys/kern/kern_sig.c#L278 Link: [2] https://github.com/illumos/illumos-gate/blob/76f19f5fdc974fe5be5c82a556e43a4df93f1de1/usr/src/uts/common/syscall/sigaction.c#L86 Link: [3] https://github.com/apple/darwin-xnu/blob/a449c6a3b8014d9406c2ddbdc81795da24aa7443/bsd/kern/kern_sig.c#L480 Link: [4] https://github.com/freebsd/freebsd/blob/eded70c37057857c6e23fae51f86b8f8f43cd2d0/sys/kern/kern_sig.c#L699 Link: [5] https://github.com/NetBSD/src/blob/3365779becdcedfca206091a645a0e8e22b2946e/sys/kern/sys_sig.c#L473 Signed-off-by: Peter Collingbourne Reviewed-by: Dave Martin Acked-by: "Eric W. Biederman" Link: https://linux-review.googlesource.com/id/I35aab6f5be932505d90f3b3450c083b4db1eca86 Link: https://lkml.kernel.org/r/878dbcb5f47bc9b11881c81f745c0bef5c23f97f.1605235762.git.pcc@google.com Signed-off-by: Eric W. Biederman --- include/linux/signal_types.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h index f8a90ae9c6ec..a7887ad84d36 100644 --- a/include/linux/signal_types.h +++ b/include/linux/signal_types.h @@ -68,4 +68,16 @@ struct ksignal { int sig; }; +#ifndef __ARCH_UAPI_SA_FLAGS +#ifdef SA_RESTORER +#define __ARCH_UAPI_SA_FLAGS SA_RESTORER +#else +#define __ARCH_UAPI_SA_FLAGS 0 +#endif +#endif + +#define UAPI_SA_FLAGS \ + (SA_NOCLDSTOP | SA_NOCLDWAIT | SA_SIGINFO | SA_ONSTACK | SA_RESTART | \ + SA_NODEFER | SA_RESETHAND | __ARCH_UAPI_SA_FLAGS) + #endif /* _LINUX_SIGNAL_TYPES_H */ -- cgit v1.2.3 From 6ac05e832a9e96f9b1c42a8917cdd317d7b6c8fa Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 20 Nov 2020 12:33:45 -0800 Subject: signal: define the SA_EXPOSE_TAGBITS bit in sa_flags Architectures that support address tagging, such as arm64, may want to expose fault address tag bits to the signal handler to help diagnose memory errors. However, these bits have not been previously set, and their presence may confuse unaware user applications. Therefore, introduce a SA_EXPOSE_TAGBITS flag bit in sa_flags that a signal handler may use to explicitly request that the bits are set. The generic signal handler APIs expect to receive tagged addresses. Architectures may specify how to untag addresses in the case where SA_EXPOSE_TAGBITS is clear by defining the arch_untagged_si_addr function. Signed-off-by: Peter Collingbourne Acked-by: "Eric W. Biederman" Link: https://linux-review.googlesource.com/id/I16dd0ed2081f091fce97be0190cb8caa874c26cb Link: https://lkml.kernel.org/r/13cf24d00ebdd8e1f55caf1821c7c29d54100191.1605904350.git.pcc@google.com Signed-off-by: Eric W. Biederman --- include/linux/signal.h | 14 ++++++++++++++ include/linux/signal_types.h | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index b256f9c65661..205526c4003a 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -469,4 +469,18 @@ struct seq_file; extern void render_sigset_t(struct seq_file *, const char *, sigset_t *); #endif +#ifndef arch_untagged_si_addr +/* + * Given a fault address and a signal and si_code which correspond to the + * _sigfault union member, returns the address that must appear in si_addr if + * the signal handler does not have SA_EXPOSE_TAGBITS enabled in sa_flags. + */ +static inline void __user *arch_untagged_si_addr(void __user *addr, + unsigned long sig, + unsigned long si_code) +{ + return addr; +} +#endif + #endif /* _LINUX_SIGNAL_H */ diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h index a7887ad84d36..68e06c75c5b2 100644 --- a/include/linux/signal_types.h +++ b/include/linux/signal_types.h @@ -78,6 +78,6 @@ struct ksignal { #define UAPI_SA_FLAGS \ (SA_NOCLDSTOP | SA_NOCLDWAIT | SA_SIGINFO | SA_ONSTACK | SA_RESTART | \ - SA_NODEFER | SA_RESETHAND | __ARCH_UAPI_SA_FLAGS) + SA_NODEFER | SA_RESETHAND | SA_EXPOSE_TAGBITS | __ARCH_UAPI_SA_FLAGS) #endif /* _LINUX_SIGNAL_TYPES_H */ -- cgit v1.2.3 From 4e1d9a737d00f2cc811dc5654f82c92c7d80e98c Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Thu, 19 Nov 2020 08:25:39 +0100 Subject: PM: sleep: Add dev_wakeup_path() helper Add dev_wakeup_path() helper to avoid to spread dev->power.wakeup_path test in drivers. Signed-off-by: Patrice Chotard Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_wakeup.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index aa3da6611533..196a157456aa 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -84,6 +84,11 @@ static inline bool device_may_wakeup(struct device *dev) return dev->power.can_wakeup && !!dev->power.wakeup; } +static inline bool device_wakeup_path(struct device *dev) +{ + return dev->power.wakeup_path; +} + static inline void device_set_wakeup_path(struct device *dev) { dev->power.wakeup_path = true; @@ -174,6 +179,11 @@ static inline bool device_may_wakeup(struct device *dev) return dev->power.can_wakeup && dev->power.should_wakeup; } +static inline bool device_wakeup_path(struct device *dev) +{ + return false; +} + static inline void device_set_wakeup_path(struct device *dev) {} static inline void __pm_stay_awake(struct wakeup_source *ws) {} -- cgit v1.2.3 From a94ef811f7c3748736b85db0406da8e4ea391ac6 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Thu, 19 Nov 2020 09:43:25 -0700 Subject: PM: domains: replace -ENOTSUPP with -EOPNOTSUPP While submitting a patch to add next_wakeup, checkpatch reported this - WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP + return -ENOTSUPP; Address the above warning in other functions in pm_domain.h. Reviewed-by: Ulf Hansson Signed-off-by: Lina Iyer Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1ad0ec481416..5fd20d117cbe 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -255,24 +255,24 @@ static inline int pm_genpd_init(struct generic_pm_domain *genpd, } static inline int pm_genpd_remove(struct generic_pm_domain *genpd) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int dev_pm_genpd_remove_notifier(struct device *dev) { - return -ENOTSUPP; + return -EOPNOTSUPP; } #define simple_qos_governor (*(struct dev_power_governor *)(NULL)) @@ -325,13 +325,13 @@ struct device *genpd_dev_pm_attach_by_name(struct device *dev, static inline int of_genpd_add_provider_simple(struct device_node *np, struct generic_pm_domain *genpd) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int of_genpd_add_provider_onecell(struct device_node *np, struct genpd_onecell_data *data) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline void of_genpd_del_provider(struct device_node *np) {} @@ -387,7 +387,7 @@ static inline struct device *genpd_dev_pm_attach_by_name(struct device *dev, static inline struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) { - return ERR_PTR(-ENOTSUPP); + return ERR_PTR(-EOPNOTSUPP); } #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */ -- cgit v1.2.3 From 8eb621698fd4c49703d512fe437d84ab822bc59e Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 16 Sep 2020 11:12:03 +0100 Subject: keys: Provide the original description to the key preparser Provide the proposed description (add key) or the original description (update/instantiate key) when preparsing a key so that the key type can validate it against the data. This is important for rxrpc server keys as we need to check that they have the right amount of key material present - and it's better to do that when the key is loaded rather than deep in trying to process a response packet. Signed-off-by: David Howells cc: Jarkko Sakkinen cc: keyrings@vger.kernel.org --- include/linux/key-type.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 2ab2d6d6aeab..7d985a1dfe4a 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -29,6 +29,7 @@ struct kernel_pkey_params; * clear the contents. */ struct key_preparsed_payload { + const char *orig_description; /* Actual or proposed description (maybe NULL) */ char *description; /* Proposed key description (or NULL) */ union key_payload payload; /* Proposed payload */ const void *data; /* Raw data */ -- cgit v1.2.3 From e7095c35abfc5a5d566941a87434c0fd5ffb570f Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 19 Nov 2020 19:10:49 +0000 Subject: regulator: core: add of_match_full_name boolean flag During regulators registration, if .of_match and .regulators_node are defined as non-null strings in struct regulator_desc the core searches the DT subtree rooted at .regulators_node trying to match, at first, .of_match against the 'regulator-compatible' property and, then, falling back to use the name of the node itself to determine a good match. Property 'regulator-compatible', though, is now deprecated and falling back to match against the node name, works fine only as long as the involved nodes are named in an unique way across the searched subtree; if that's not the case, like when using @ style naming for properties indexed via 'reg' property (as advised by the standard), the above matching mechanism based on the simple common name will lead to multiple matches and the only viable alternative would be to properly define the now deprecated 'regulator-compatible' as the node full name, i.e. @. In order to address this case without using such deprecated binding, define a new boolean flag .of_match_full_name in struct regulator_desc to force the core to match against the node full-name instead of the plain name. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20201119191051.46363-4-cristian.marussi@arm.com Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 11cade73726c..d7c77ee370f3 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -223,6 +223,8 @@ enum regulator_type { * @name: Identifying name for the regulator. * @supply_name: Identifying the regulator supply * @of_match: Name used to identify regulator in DT. + * @of_match_full_name: A flag to indicate that the of_match string, if + * present, should be matched against the node full_name. * @regulators_node: Name of node containing regulator definitions in DT. * @of_parse_cb: Optional callback called only if of_match is present. * Will be called for each regulator parsed from DT, during @@ -314,6 +316,7 @@ struct regulator_desc { const char *name; const char *supply_name; const char *of_match; + bool of_match_full_name; const char *regulators_node; int (*of_parse_cb)(struct device_node *, const struct regulator_desc *, -- cgit v1.2.3 From 076d38b88c4146fd5506933d440b881741b6ab60 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Fri, 20 Nov 2020 09:41:04 +0100 Subject: net: ptp: introduce common defines for PTP message types Using PTP wide defines will obsolete different driver internal defines and uses of magic numbers. Signed-off-by: Christian Eggers Cc: Kurt Kanzenbach Reviewed-by: Vladimir Oltean Reviewed-by: Richard Cochran Signed-off-by: Jakub Kicinski --- include/linux/ptp_classify.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index c6487b7ab026..ae04968a3a47 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -31,6 +31,11 @@ #define PTP_CLASS_V2_VLAN (PTP_CLASS_V2 | PTP_CLASS_VLAN) #define PTP_CLASS_L4 (PTP_CLASS_IPV4 | PTP_CLASS_IPV6) +#define PTP_MSGTYPE_SYNC 0x0 +#define PTP_MSGTYPE_DELAY_REQ 0x1 +#define PTP_MSGTYPE_PDELAY_REQ 0x2 +#define PTP_MSGTYPE_PDELAY_RESP 0x3 + #define PTP_EV_PORT 319 #define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */ @@ -140,7 +145,7 @@ static inline u8 ptp_get_msgtype(const struct ptp_header *hdr, /* The return is meaningless. The stub function would not be * executed since no available header from ptp_parse_header. */ - return 0; + return PTP_MSGTYPE_SYNC; } #endif #endif /* _PTP_CLASSIFY_H_ */ -- cgit v1.2.3 From 3df98d79215ace13d1e91ddfc5a67a0f5acbd83f Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Sun, 27 Sep 2020 22:38:26 -0400 Subject: lsm,selinux: pass flowi_common instead of flowi to the LSM hooks As pointed out by Herbert in a recent related patch, the LSM hooks do not have the necessary address family information to use the flowi struct safely. As none of the LSMs currently use any of the protocol specific flowi information, replace the flowi pointers with pointers to the address family independent flowi_common struct. Reported-by: Herbert Xu Acked-by: James Morris Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 4 ++-- include/linux/lsm_hooks.h | 2 +- include/linux/security.h | 23 ++++++++++++++--------- 3 files changed, 17 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 32a940117e7a..f70984c8318b 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -311,7 +311,7 @@ LSM_HOOK(int, 0, secmark_relabel_packet, u32 secid) LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_inc, void) LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_dec, void) LSM_HOOK(void, LSM_RET_VOID, req_classify_flow, const struct request_sock *req, - struct flowi *fl) + struct flowi_common *flic) LSM_HOOK(int, 0, tun_dev_alloc_security, void **security) LSM_HOOK(void, LSM_RET_VOID, tun_dev_free_security, void *security) LSM_HOOK(int, 0, tun_dev_create, void) @@ -351,7 +351,7 @@ LSM_HOOK(int, 0, xfrm_state_delete_security, struct xfrm_state *x) LSM_HOOK(int, 0, xfrm_policy_lookup, struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) LSM_HOOK(int, 1, xfrm_state_pol_flow_match, struct xfrm_state *x, - struct xfrm_policy *xp, const struct flowi *fl) + struct xfrm_policy *xp, const struct flowi_common *flic) LSM_HOOK(int, 0, xfrm_decode_session, struct sk_buff *skb, u32 *secid, int ckall) #endif /* CONFIG_SECURITY_NETWORK_XFRM */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index c503f7ab8afb..a19adef1f088 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1105,7 +1105,7 @@ * @xfrm_state_pol_flow_match: * @x contains the state to match. * @xp contains the policy to check for a match. - * @fl contains the flow to check for a match. + * @flic contains the flowi_common struct to check for a match. * Return 1 if there is a match. * @xfrm_decode_session: * @skb points to skb to decode. diff --git a/include/linux/security.h b/include/linux/security.h index bc2725491560..469273e8b46d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -167,7 +167,7 @@ struct sk_buff; struct sock; struct sockaddr; struct socket; -struct flowi; +struct flowi_common; struct dst_entry; struct xfrm_selector; struct xfrm_policy; @@ -1355,8 +1355,9 @@ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u int security_sk_alloc(struct sock *sk, int family, gfp_t priority); void security_sk_free(struct sock *sk); void security_sk_clone(const struct sock *sk, struct sock *newsk); -void security_sk_classify_flow(struct sock *sk, struct flowi *fl); -void security_req_classify_flow(const struct request_sock *req, struct flowi *fl); +void security_sk_classify_flow(struct sock *sk, struct flowi_common *flic); +void security_req_classify_flow(const struct request_sock *req, + struct flowi_common *flic); void security_sock_graft(struct sock*sk, struct socket *parent); int security_inet_conn_request(struct sock *sk, struct sk_buff *skb, struct request_sock *req); @@ -1507,11 +1508,13 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) { } -static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) +static inline void security_sk_classify_flow(struct sock *sk, + struct flowi_common *flic) { } -static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +static inline void security_req_classify_flow(const struct request_sock *req, + struct flowi_common *flic) { } @@ -1638,9 +1641,9 @@ void security_xfrm_state_free(struct xfrm_state *x); int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); int security_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, - const struct flowi *fl); + const struct flowi_common *flic); int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid); -void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl); +void security_skb_classify_flow(struct sk_buff *skb, struct flowi_common *flic); #else /* CONFIG_SECURITY_NETWORK_XFRM */ @@ -1692,7 +1695,8 @@ static inline int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_s } static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, - struct xfrm_policy *xp, const struct flowi *fl) + struct xfrm_policy *xp, + const struct flowi_common *flic) { return 1; } @@ -1702,7 +1706,8 @@ static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) return 0; } -static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) +static inline void security_skb_classify_flow(struct sk_buff *skb, + struct flowi_common *flic) { } -- cgit v1.2.3 From cc69837fcaf467426ca19e5790085c26146a2300 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 20 Nov 2020 14:50:52 -0800 Subject: net: don't include ethtool.h from netdevice.h linux/netdevice.h is included in very many places, touching any of its dependecies causes large incremental builds. Drop the linux/ethtool.h include, linux/netdevice.h just needs a forward declaration of struct ethtool_ops. Fix all the places which made use of this implicit include. Acked-by: Johannes Berg Acked-by: Shannon Nelson Reviewed-by: Jesse Brandeburg Link: https://lore.kernel.org/r/20201120225052.1427503-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- include/linux/qed/qed_if.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 03433a4c929e..0049e8fe4905 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -34,7 +34,6 @@ #include #include -#include #include #ifdef CONFIG_DCB #include @@ -51,6 +50,7 @@ struct netpoll_info; struct device; +struct ethtool_ops; struct phy_device; struct dsa_port; struct ip_tunnel_parm; diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 57fb295ea41a..68d17a4fbf20 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -7,6 +7,7 @@ #ifndef _QED_IF_H #define _QED_IF_H +#include #include #include #include -- cgit v1.2.3 From d549699048b4b5c22dd710455bcdb76966e55aa3 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Sat, 21 Nov 2020 08:28:17 +0200 Subject: net/packet: fix packet receive on L3 devices without visible hard header In the patchset merged by commit b9fcf0a0d826 ("Merge branch 'support-AF_PACKET-for-layer-3-devices'") L3 devices which did not have header_ops were given one for the purpose of protocol parsing on af_packet transmit path. That change made af_packet receive path regard these devices as having a visible L3 header and therefore aligned incoming skb->data to point to the skb's mac_header. Some devices, such as ipip, xfrmi, and others, do not reset their mac_header prior to ingress and therefore their incoming packets became malformed. Ideally these devices would reset their mac headers, or af_packet would be able to rely on dev->hard_header_len being 0 for such cases, but it seems this is not the case. Fix by changing af_packet RX ll visibility criteria to include the existence of a '.create()' header operation, which is used when creating a device hard header - via dev_hard_header() - by upper layers, and does not exist in these L3 devices. As this predicate may be useful in other situations, add it as a common dev_has_header() helper in netdevice.h. Fixes: b9fcf0a0d826 ("Merge branch 'support-AF_PACKET-for-layer-3-devices'") Signed-off-by: Eyal Birger Acked-by: Jason A. Donenfeld Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20201121062817.3178900-1-eyal.birger@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 964b494b0e8d..fa275a054f46 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3137,6 +3137,11 @@ static inline bool dev_validate_header(const struct net_device *dev, return false; } +static inline bool dev_has_header(const struct net_device *dev) +{ + return dev->header_ops && dev->header_ops->create; +} + typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len, int size); int register_gifconf(unsigned int family, gifconf_func_t *gifconf); -- cgit v1.2.3 From b7cab9be7c16128a0de21ed7ae67211838813437 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 4 Nov 2020 23:23:58 +0800 Subject: soundwire: SDCA: detect sdca_cascade interrupt The SoundWire 1.2 specification defines an "SDCA cascade" bit which handles a logical OR of all SDCA interrupt sources (up to 30 defined). Due to limitations of the addressing space, this bit is located in the SDW_DP0_INT register when DP0 is used, or alternatively in the DP0_SDCA_Support_INTSTAT register when DP0 is not used. To allow for both cases to be handled, this bit will be checked in the main device-level interrupt handling code. This will result in the register being read twice if DP0 is enabled, but it's not clear how to optimize this case. It's also more logical to deal with this interrupt at the device than the port level, this bit is really not DP0 specific and its location in the DP0_INTSTAT bit is only due to the lack of free space in SCP_INTSTAT_1. The SDCA_Cascade bit cannot be masked or cleared, so the interrupt handling only forwards the detection to the Slave driver, which will deal with reading the relevant SDCA status bits and clearing them. The bus driver only signals the detection. The communication with the Slave driver is based on the same interrupt callback, with only an extension to provide the status of the sdca_cascade bit. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Guennadi Liakhovetski Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20201104152358.9518-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 41cc1192f9aa..f0b01b728640 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -359,6 +359,7 @@ struct sdw_dpn_prop { * @sink_dpn_prop: Sink Data Port N properties * @scp_int1_mask: SCP_INT1_MASK desired settings * @quirks: bitmask identifying deltas from the MIPI specification + * @is_sdca: the Slave supports the SDCA specification */ struct sdw_slave_prop { u32 mipi_revision; @@ -382,6 +383,7 @@ struct sdw_slave_prop { struct sdw_dpn_prop *sink_dpn_prop; u8 scp_int1_mask; u32 quirks; + bool is_sdca; }; #define SDW_SLAVE_QUIRKS_INVALID_INITIAL_PARITY BIT(0) @@ -479,10 +481,12 @@ struct sdw_slave_id { /** * struct sdw_slave_intr_status - Slave interrupt status + * @sdca_cascade: set if the Slave device reports an SDCA interrupt * @control_port: control port status * @port: data port status */ struct sdw_slave_intr_status { + bool sdca_cascade; u8 control_port; u8 port[15]; }; -- cgit v1.2.3 From 74d862b682f51e45d25b95b1ecf212428a4967b0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 Nov 2020 20:48:42 +0100 Subject: sched: Make migrate_disable/enable() independent of RT Now that the scheduler can deal with migrate disable properly, there is no real compelling reason to make it only available for RT. There are quite some code pathes which needlessly disable preemption in order to prevent migration and some constructs like kmap_atomic() enforce it implicitly. Making it available independent of RT allows to provide a preemptible variant of kmap_atomic() and makes the code more consistent in general. Signed-off-by: Thomas Gleixner Grudgingly-Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20201118204007.269943012@linutronix.de --- include/linux/kernel.h | 21 ++++++++++++++------- include/linux/preempt.h | 38 +++----------------------------------- include/linux/sched.h | 2 +- 3 files changed, 18 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2f05e9128201..665837f9a831 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -204,6 +204,7 @@ extern int _cond_resched(void); extern void ___might_sleep(const char *file, int line, int preempt_offset); extern void __might_sleep(const char *file, int line, int preempt_offset); extern void __cant_sleep(const char *file, int line, int preempt_offset); +extern void __cant_migrate(const char *file, int line); /** * might_sleep - annotation for functions that can sleep @@ -227,6 +228,18 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset); # define cant_sleep() \ do { __cant_sleep(__FILE__, __LINE__, 0); } while (0) # define sched_annotate_sleep() (current->task_state_change = 0) + +/** + * cant_migrate - annotation for functions that cannot migrate + * + * Will print a stack trace if executed in code which is migratable + */ +# define cant_migrate() \ + do { \ + if (IS_ENABLED(CONFIG_SMP)) \ + __cant_migrate(__FILE__, __LINE__); \ + } while (0) + /** * non_block_start - annotate the start of section where sleeping is prohibited * @@ -251,6 +264,7 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset); int preempt_offset) { } # define might_sleep() do { might_resched(); } while (0) # define cant_sleep() do { } while (0) +# define cant_migrate() do { } while (0) # define sched_annotate_sleep() do { } while (0) # define non_block_start() do { } while (0) # define non_block_end() do { } while (0) @@ -258,13 +272,6 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset); #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) -#ifndef CONFIG_PREEMPT_RT -# define cant_migrate() cant_sleep() -#else - /* Placeholder for now */ -# define cant_migrate() do { } while (0) -#endif - /** * abs - return absolute value of an argument * @x: the value. If it is unsigned type, it is converted to signed type first. diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 8b43922e65df..6df63cbe8bb0 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -322,7 +322,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, #endif -#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) +#ifdef CONFIG_SMP /* * Migrate-Disable and why it is undesired. @@ -382,43 +382,11 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, extern void migrate_disable(void); extern void migrate_enable(void); -#elif defined(CONFIG_PREEMPT_RT) +#else static inline void migrate_disable(void) { } static inline void migrate_enable(void) { } -#else /* !CONFIG_PREEMPT_RT */ - -/** - * migrate_disable - Prevent migration of the current task - * - * Maps to preempt_disable() which also disables preemption. Use - * migrate_disable() to annotate that the intent is to prevent migration, - * but not necessarily preemption. - * - * Can be invoked nested like preempt_disable() and needs the corresponding - * number of migrate_enable() invocations. - */ -static __always_inline void migrate_disable(void) -{ - preempt_disable(); -} - -/** - * migrate_enable - Allow migration of the current task - * - * Counterpart to migrate_disable(). - * - * As migrate_disable() can be invoked nested, only the outermost invocation - * reenables migration. - * - * Currently mapped to preempt_enable(). - */ -static __always_inline void migrate_enable(void) -{ - preempt_enable(); -} - -#endif /* CONFIG_SMP && CONFIG_PREEMPT_RT */ +#endif /* CONFIG_SMP */ #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3af9d52fe093..a33f35f68060 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -715,7 +715,7 @@ struct task_struct { const cpumask_t *cpus_ptr; cpumask_t cpus_mask; void *migration_pending; -#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) +#ifdef CONFIG_SMP unsigned short migration_disabled; #endif unsigned short migration_flags; -- cgit v1.2.3 From 5fbda3ecd14a5343644979c98d6eb65b7e7de9d8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 Nov 2020 20:48:43 +0100 Subject: sched: highmem: Store local kmaps in task struct Instead of storing the map per CPU provide and use per task storage. That prepares for local kmaps which are preemptible. The context switch code is preparatory and not yet in use because kmap_atomic() runs with preemption disabled. Will be made usable in the next step. The context switch logic is safe even when an interrupt happens after clearing or before restoring the kmaps. The kmap index in task struct is not modified so any nesting kmap in an interrupt will use unused indices and on return the counter is the same as before. Also add an assert into the return to user space code. Going back to user space with an active kmap local is a nono. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20201118204007.372935758@linutronix.de --- include/linux/highmem-internal.h | 10 ++++++++++ include/linux/sched.h | 9 +++++++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 6ceed907b14e..c5a22177db85 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -9,6 +9,16 @@ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); void *__kmap_local_page_prot(struct page *page, pgprot_t prot); void kunmap_local_indexed(void *vaddr); +void kmap_local_fork(struct task_struct *tsk); +void __kmap_local_sched_out(void); +void __kmap_local_sched_in(void); +static inline void kmap_assert_nomap(void) +{ + DEBUG_LOCKS_WARN_ON(current->kmap_ctrl.idx); +} +#else +static inline void kmap_local_fork(struct task_struct *tsk) { } +static inline void kmap_assert_nomap(void) { } #endif #ifdef CONFIG_HIGHMEM diff --git a/include/linux/sched.h b/include/linux/sched.h index a33f35f68060..8946911cee9f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -34,6 +34,7 @@ #include #include #include +#include /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; @@ -629,6 +630,13 @@ struct wake_q_node { struct wake_q_node *next; }; +struct kmap_ctrl { +#ifdef CONFIG_KMAP_LOCAL + int idx; + pte_t pteval[KM_MAX_IDX]; +#endif +}; + struct task_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* @@ -1294,6 +1302,7 @@ struct task_struct { unsigned int sequential_io; unsigned int sequential_io_avg; #endif + struct kmap_ctrl kmap_ctrl; #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif -- cgit v1.2.3 From f3ba3c710ac5a30cd058615a9eb62d2ad95bb782 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 Nov 2020 20:48:44 +0100 Subject: mm/highmem: Provide kmap_local* Now that the kmap atomic index is stored in task struct provide a preemptible variant. On context switch the maps of an outgoing task are removed and the map of the incoming task are restored. That's obviously slow, but highmem is slow anyway. The kmap_local.*() functions can be invoked from both preemptible and atomic context. kmap local sections disable migration to keep the resulting virtual mapping address correct, but disable neither pagefaults nor preemption. A wholesale conversion of kmap_atomic to be fully preemptible is not possible because some of the usage sites might rely on the preemption disable for serialization or on the implicit pagefault disable. Needs to be done on a case by case basis. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20201118204007.468533059@linutronix.de --- include/linux/highmem-internal.h | 48 ++++++++++++++++++++++++++++++++++++++++ include/linux/highmem.h | 43 +++++++++++++++++++++-------------- 2 files changed, 75 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index c5a22177db85..1bbe96dc8be6 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -68,6 +68,26 @@ static inline void kmap_flush_unused(void) __kmap_flush_unused(); } +static inline void *kmap_local_page(struct page *page) +{ + return __kmap_local_page_prot(page, kmap_prot); +} + +static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) +{ + return __kmap_local_page_prot(page, prot); +} + +static inline void *kmap_local_pfn(unsigned long pfn) +{ + return __kmap_local_pfn_prot(pfn, kmap_prot); +} + +static inline void __kunmap_local(void *vaddr) +{ + kunmap_local_indexed(vaddr); +} + static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) { preempt_disable(); @@ -140,6 +160,28 @@ static inline void kunmap(struct page *page) #endif } +static inline void *kmap_local_page(struct page *page) +{ + return page_address(page); +} + +static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) +{ + return kmap_local_page(page); +} + +static inline void *kmap_local_pfn(unsigned long pfn) +{ + return kmap_local_page(pfn_to_page(pfn)); +} + +static inline void __kunmap_local(void *addr) +{ +#ifdef ARCH_HAS_FLUSH_ON_KUNMAP + kunmap_flush_on_unmap(addr); +#endif +} + static inline void *kmap_atomic(struct page *page) { preempt_disable(); @@ -181,4 +223,10 @@ do { \ __kunmap_atomic(__addr); \ } while (0) +#define kunmap_local(__addr) \ +do { \ + BUILD_BUG_ON(__same_type((__addr), struct page *)); \ + __kunmap_local(__addr); \ +} while (0) + #endif diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 7d098bd621f6..f597830f26b4 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -60,24 +60,22 @@ static inline struct page *kmap_to_page(void *addr); static inline void kmap_flush_unused(void); /** - * kmap_atomic - Atomically map a page for temporary usage + * kmap_local_page - Map a page for temporary usage * @page: Pointer to the page to be mapped * * Returns: The virtual address of the mapping * - * Side effect: On return pagefaults and preemption are disabled. - * * Can be invoked from any context. * * Requires careful handling when nesting multiple mappings because the map * management is stack based. The unmap has to be in the reverse order of * the map operation: * - * addr1 = kmap_atomic(page1); - * addr2 = kmap_atomic(page2); + * addr1 = kmap_local_page(page1); + * addr2 = kmap_local_page(page2); * ... - * kunmap_atomic(addr2); - * kunmap_atomic(addr1); + * kunmap_local(addr2); + * kunmap_local(addr1); * * Unmapping addr1 before addr2 is invalid and causes malfunction. * @@ -88,10 +86,26 @@ static inline void kmap_flush_unused(void); * virtual address of the direct mapping. Only real highmem pages are * temporarily mapped. * - * While it is significantly faster than kmap() it comes with restrictions - * about the pointer validity and the side effects of disabling page faults - * and preemption. Use it only when absolutely necessary, e.g. from non - * preemptible contexts. + * While it is significantly faster than kmap() for the higmem case it + * comes with restrictions about the pointer validity. Only use when really + * necessary. + * + * On HIGHMEM enabled systems mapping a highmem page has the side effect of + * disabling migration in order to keep the virtual address stable across + * preemption. No caller of kmap_local_page() can rely on this side effect. + */ +static inline void *kmap_local_page(struct page *page); + +/** + * kmap_atomic - Atomically map a page for temporary usage - Deprecated! + * @page: Pointer to the page to be mapped + * + * Returns: The virtual address of the mapping + * + * Effectively a wrapper around kmap_local_page() which disables pagefaults + * and preemption. + * + * Do not use in new code. Use kmap_local_page() instead. */ static inline void *kmap_atomic(struct page *page); @@ -101,12 +115,9 @@ static inline void *kmap_atomic(struct page *page); * * Counterpart to kmap_atomic(). * - * Undoes the side effects of kmap_atomic(), i.e. reenabling pagefaults and + * Effectively a wrapper around kunmap_local() which additionally undoes + * the side effects of kmap_atomic(), i.e. reenabling pagefaults and * preemption. - * - * Other than that a NOOP for CONFIG_HIGHMEM=n and for mappings of pages - * in the low memory area. For real highmen pages the mapping which was - * established with kmap_atomic() is destroyed. */ /* Highmem related interfaces for management code */ -- cgit v1.2.3 From e66f6e095486f0210fcf3c5eb3ecf13fa348be4c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 Nov 2020 20:48:45 +0100 Subject: io-mapping: Provide iomap_local variant Similar to kmap local provide a iomap local variant which only disables migration, but neither disables pagefaults nor preemption. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20201118204007.561220818@linutronix.de --- include/linux/io-mapping.h | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 60e7c83e4904..c093e81310a9 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -82,6 +82,21 @@ io_mapping_unmap_atomic(void __iomem *vaddr) preempt_enable(); } +static inline void __iomem * +io_mapping_map_local_wc(struct io_mapping *mapping, unsigned long offset) +{ + resource_size_t phys_addr; + + BUG_ON(offset >= mapping->size); + phys_addr = mapping->base + offset; + return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); +} + +static inline void io_mapping_unmap_local(void __iomem *vaddr) +{ + kunmap_local_indexed((void __force *)vaddr); +} + static inline void __iomem * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset, @@ -101,7 +116,7 @@ io_mapping_unmap(void __iomem *vaddr) iounmap(vaddr); } -#else +#else /* HAVE_ATOMIC_IOMAP */ #include @@ -166,7 +181,18 @@ io_mapping_unmap_atomic(void __iomem *vaddr) preempt_enable(); } -#endif /* HAVE_ATOMIC_IOMAP */ +static inline void __iomem * +io_mapping_map_local_wc(struct io_mapping *mapping, unsigned long offset) +{ + return io_mapping_map_wc(mapping, offset, PAGE_SIZE); +} + +static inline void io_mapping_unmap_local(void __iomem *vaddr) +{ + io_mapping_unmap(vaddr); +} + +#endif /* !HAVE_ATOMIC_IOMAP */ static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, -- cgit v1.2.3 From acfdd18591eaac25446e976a0c0d190f8b3dbfb1 Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Mon, 23 Nov 2020 21:52:41 -0800 Subject: firmware: xilinx: Use hash-table for api feature check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently array of fix length PM_API_MAX is used to cache the pm_api version (valid or invalid). However ATF based PM APIs values are much higher then PM_API_MAX. So to include ATF based PM APIs also, use hash-table to store the pm_api version status. Signed-off-by: Amit Sunil Dhamne Reported-by: Arnd Bergmann  Signed-off-by: Ravi Patel Signed-off-by: Rajan Vaja Reviewed-by: Arnd Bergmann Tested-by: Michal Simek Fixes: f3217d6f2f7a ("firmware: xilinx: fix out-of-bounds access") Cc: stable Link: https://lore.kernel.org/r/1606197161-25976-1-git-send-email-rajan.vaja@xilinx.com Signed-off-by: Michal Simek --- include/linux/firmware/xlnx-zynqmp.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 5968df82b991..41a1bab98b7e 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -50,10 +50,6 @@ #define ZYNQMP_PM_CAPABILITY_WAKEUP 0x4U #define ZYNQMP_PM_CAPABILITY_UNUSABLE 0x8U -/* Feature check status */ -#define PM_FEATURE_INVALID -1 -#define PM_FEATURE_UNCHECKED 0 - /* * Firmware FPGA Manager flags * XILINX_ZYNQMP_PM_FPGA_FULL: FPGA full reconfiguration -- cgit v1.2.3 From 7a9f50a05843fee8366bd3a65addbebaa7cf7f07 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2020 11:51:29 +0200 Subject: irq_work: Cleanup Get rid of the __call_single_node union and clean up the API a little to avoid external code relying on the structure layout as much. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Frederic Weisbecker --- include/linux/irq_work.h | 33 +++++++++++++++++++++------------ include/linux/irqflags.h | 4 ++-- 2 files changed, 23 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 30823780c192..ec2a47a81e42 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -14,28 +14,37 @@ */ struct irq_work { - union { - struct __call_single_node node; - struct { - struct llist_node llnode; - atomic_t flags; - }; - }; + struct __call_single_node node; void (*func)(struct irq_work *); }; +#define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){ \ + .node = { .u_flags = (_flags), }, \ + .func = (_func), \ +} + +#define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0) +#define IRQ_WORK_INIT_LAZY(_func) __IRQ_WORK_INIT(_func, IRQ_WORK_LAZY) +#define IRQ_WORK_INIT_HARD(_func) __IRQ_WORK_INIT(_func, IRQ_WORK_HARD_IRQ) + +#define DEFINE_IRQ_WORK(name, _f) \ + struct irq_work name = IRQ_WORK_INIT(_f) + static inline void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) { - atomic_set(&work->flags, 0); - work->func = func; + *work = IRQ_WORK_INIT(func); } -#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { \ - .flags = ATOMIC_INIT(0), \ - .func = (_f) \ +static inline bool irq_work_is_pending(struct irq_work *work) +{ + return atomic_read(&work->node.a_flags) & IRQ_WORK_PENDING; } +static inline bool irq_work_is_busy(struct irq_work *work) +{ + return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY; +} bool irq_work_queue(struct irq_work *work); bool irq_work_queue_on(struct irq_work *work, int cpu); diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 3ed4e8771b64..fef2d43a7a1d 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -109,12 +109,12 @@ do { \ # define lockdep_irq_work_enter(__work) \ do { \ - if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\ + if (!(atomic_read(&__work->node.a_flags) & IRQ_WORK_HARD_IRQ))\ current->irq_config = 1; \ } while (0) # define lockdep_irq_work_exit(__work) \ do { \ - if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\ + if (!(atomic_read(&__work->node.a_flags) & IRQ_WORK_HARD_IRQ))\ current->irq_config = 0; \ } while (0) -- cgit v1.2.3 From 545b8c8df41f9ecbaf806332d4095bc4bc7c14e8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2020 11:29:31 +0200 Subject: smp: Cleanup smp_call_function*() Get rid of the __call_single_node union and cleanup the API a little to avoid external code relying on the structure layout as much. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Frederic Weisbecker --- include/linux/smp.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 9f13966d3d92..70c6f6284dcf 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -21,24 +21,23 @@ typedef bool (*smp_cond_func_t)(int cpu, void *info); * structure shares (partial) layout with struct irq_work */ struct __call_single_data { - union { - struct __call_single_node node; - struct { - struct llist_node llist; - unsigned int flags; -#ifdef CONFIG_64BIT - u16 src, dst; -#endif - }; - }; + struct __call_single_node node; smp_call_func_t func; void *info; }; +#define CSD_INIT(_func, _info) \ + (struct __call_single_data){ .func = (_func), .info = (_info), } + /* Use __aligned() to avoid to use 2 cache lines for 1 csd */ typedef struct __call_single_data call_single_data_t __aligned(sizeof(struct __call_single_data)); +#define INIT_CSD(_csd, _func, _info) \ +do { \ + *(_csd) = CSD_INIT((_func), (_info)); \ +} while (0) + /* * Enqueue a llist_node on the call_single_queue; be very careful, read * flush_smp_call_function_queue() in detail. -- cgit v1.2.3 From 2914b0ba61a9d253535e51af16c7122a8148995d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2020 22:28:37 +0200 Subject: irq_work: Optimize irq_work_single() Trade one atomic op for a full memory barrier. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Frederic Weisbecker --- include/linux/irqflags.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index fef2d43a7a1d..8de0e1373de7 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -107,14 +107,14 @@ do { \ current->irq_config = 0; \ } while (0) -# define lockdep_irq_work_enter(__work) \ +# define lockdep_irq_work_enter(_flags) \ do { \ - if (!(atomic_read(&__work->node.a_flags) & IRQ_WORK_HARD_IRQ))\ + if (!((_flags) & IRQ_WORK_HARD_IRQ)) \ current->irq_config = 1; \ } while (0) -# define lockdep_irq_work_exit(__work) \ +# define lockdep_irq_work_exit(_flags) \ do { \ - if (!(atomic_read(&__work->node.a_flags) & IRQ_WORK_HARD_IRQ))\ + if (!((_flags) & IRQ_WORK_HARD_IRQ)) \ current->irq_config = 0; \ } while (0) -- cgit v1.2.3 From 6bef038011a023db41f1b33f0776224729d52344 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 20 Nov 2020 14:42:38 -0700 Subject: rpmsg: Introduce __rpmsg{16|32|64} types Introduce __rpmsg{16|32|64} types along with byte order conversion functions based on an rpmsg_device operation as a foundation to make RPMSG modular and transport agnostic. Tested-by: Guennadi Liakhovetski Suggested-by: Guennadi Liakhovetski Signed-off-by: Mathieu Poirier Reviewed-by: Arnaud Pouliquen Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20201120214245.172963-2-mathieu.poirier@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 51 +++++++++++++++++++++++++++++++ include/linux/rpmsg/byteorder.h | 67 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 include/linux/rpmsg/byteorder.h (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 9fe156d1c018..faf2daff6238 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -17,6 +17,7 @@ #include #include #include +#include #define RPMSG_ADDR_ANY 0xFFFFFFFF @@ -46,6 +47,7 @@ struct rpmsg_channel_info { * @dst: destination address * @ept: the rpmsg endpoint of this channel * @announce: if set, rpmsg will announce the creation/removal of this channel + * @little_endian: True if transport is using little endian byte representation */ struct rpmsg_device { struct device dev; @@ -55,6 +57,7 @@ struct rpmsg_device { u32 dst; struct rpmsg_endpoint *ept; bool announce; + bool little_endian; const struct rpmsg_device_ops *ops; }; @@ -111,6 +114,54 @@ struct rpmsg_driver { int (*callback)(struct rpmsg_device *, void *, int, void *, u32); }; +static inline u16 rpmsg16_to_cpu(struct rpmsg_device *rpdev, __rpmsg16 val) +{ + if (!rpdev) + return __rpmsg16_to_cpu(rpmsg_is_little_endian(), val); + else + return __rpmsg16_to_cpu(rpdev->little_endian, val); +} + +static inline __rpmsg16 cpu_to_rpmsg16(struct rpmsg_device *rpdev, u16 val) +{ + if (!rpdev) + return __cpu_to_rpmsg16(rpmsg_is_little_endian(), val); + else + return __cpu_to_rpmsg16(rpdev->little_endian, val); +} + +static inline u32 rpmsg32_to_cpu(struct rpmsg_device *rpdev, __rpmsg32 val) +{ + if (!rpdev) + return __rpmsg32_to_cpu(rpmsg_is_little_endian(), val); + else + return __rpmsg32_to_cpu(rpdev->little_endian, val); +} + +static inline __rpmsg32 cpu_to_rpmsg32(struct rpmsg_device *rpdev, u32 val) +{ + if (!rpdev) + return __cpu_to_rpmsg32(rpmsg_is_little_endian(), val); + else + return __cpu_to_rpmsg32(rpdev->little_endian, val); +} + +static inline u64 rpmsg64_to_cpu(struct rpmsg_device *rpdev, __rpmsg64 val) +{ + if (!rpdev) + return __rpmsg64_to_cpu(rpmsg_is_little_endian(), val); + else + return __rpmsg64_to_cpu(rpdev->little_endian, val); +} + +static inline __rpmsg64 cpu_to_rpmsg64(struct rpmsg_device *rpdev, u64 val) +{ + if (!rpdev) + return __cpu_to_rpmsg64(rpmsg_is_little_endian(), val); + else + return __cpu_to_rpmsg64(rpdev->little_endian, val); +} + #if IS_ENABLED(CONFIG_RPMSG) int register_rpmsg_device(struct rpmsg_device *dev); diff --git a/include/linux/rpmsg/byteorder.h b/include/linux/rpmsg/byteorder.h new file mode 100644 index 000000000000..c0f565dbad6d --- /dev/null +++ b/include/linux/rpmsg/byteorder.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Follows implementation found in linux/virtio_byteorder.h + */ +#ifndef _LINUX_RPMSG_BYTEORDER_H +#define _LINUX_RPMSG_BYTEORDER_H +#include +#include + +static inline bool rpmsg_is_little_endian(void) +{ +#ifdef __LITTLE_ENDIAN + return true; +#else + return false; +#endif +} + +static inline u16 __rpmsg16_to_cpu(bool little_endian, __rpmsg16 val) +{ + if (little_endian) + return le16_to_cpu((__force __le16)val); + else + return be16_to_cpu((__force __be16)val); +} + +static inline __rpmsg16 __cpu_to_rpmsg16(bool little_endian, u16 val) +{ + if (little_endian) + return (__force __rpmsg16)cpu_to_le16(val); + else + return (__force __rpmsg16)cpu_to_be16(val); +} + +static inline u32 __rpmsg32_to_cpu(bool little_endian, __rpmsg32 val) +{ + if (little_endian) + return le32_to_cpu((__force __le32)val); + else + return be32_to_cpu((__force __be32)val); +} + +static inline __rpmsg32 __cpu_to_rpmsg32(bool little_endian, u32 val) +{ + if (little_endian) + return (__force __rpmsg32)cpu_to_le32(val); + else + return (__force __rpmsg32)cpu_to_be32(val); +} + +static inline u64 __rpmsg64_to_cpu(bool little_endian, __rpmsg64 val) +{ + if (little_endian) + return le64_to_cpu((__force __le64)val); + else + return be64_to_cpu((__force __be64)val); +} + +static inline __rpmsg64 __cpu_to_rpmsg64(bool little_endian, u64 val) +{ + if (little_endian) + return (__force __rpmsg64)cpu_to_le64(val); + else + return (__force __rpmsg64)cpu_to_be64(val); +} + +#endif /* _LINUX_RPMSG_BYTEORDER_H */ -- cgit v1.2.3 From c435a04189de372ba9ae72076b18185a884108d6 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 20 Nov 2020 14:42:40 -0700 Subject: rpmsg: Move structure rpmsg_ns_msg to header file Move structure rpmsg_ns_msg to its own header file so that it can be used by other entities. Tested-by: Guennadi Liakhovetski Signed-off-by: Mathieu Poirier Reviewed-by: Arnaud Pouliquen Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20201120214245.172963-4-mathieu.poirier@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/rpmsg/ns.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 include/linux/rpmsg/ns.h (limited to 'include/linux') diff --git a/include/linux/rpmsg/ns.h b/include/linux/rpmsg/ns.h new file mode 100644 index 000000000000..73ecc91dc26f --- /dev/null +++ b/include/linux/rpmsg/ns.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_RPMSG_NS_H +#define _LINUX_RPMSG_NS_H + +#include +#include +#include + +/** + * struct rpmsg_ns_msg - dynamic name service announcement message + * @name: name of remote service that is published + * @addr: address of remote service that is published + * @flags: indicates whether service is created or destroyed + * + * This message is sent across to publish a new service, or announce + * about its removal. When we receive these messages, an appropriate + * rpmsg channel (i.e device) is created/destroyed. In turn, the ->probe() + * or ->remove() handler of the appropriate rpmsg driver will be invoked + * (if/as-soon-as one is registered). + */ +struct rpmsg_ns_msg { + char name[RPMSG_NAME_SIZE]; + __rpmsg32 addr; + __rpmsg32 flags; +} __packed; + +/** + * enum rpmsg_ns_flags - dynamic name service announcement flags + * + * @RPMSG_NS_CREATE: a new remote service was just created + * @RPMSG_NS_DESTROY: a known remote service was just destroyed + */ +enum rpmsg_ns_flags { + RPMSG_NS_CREATE = 0, + RPMSG_NS_DESTROY = 1, +}; + +/* Address 53 is reserved for advertising remote services */ +#define RPMSG_NS_ADDR (53) + +#endif -- cgit v1.2.3 From 55488110acc1560b4599e3d509b13f2731a6fee1 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 20 Nov 2020 14:42:44 -0700 Subject: rpmsg: Make rpmsg_{register|unregister}_device() public Make function rpmsg_register_device() and rpmsg_unregister_device() functions public so that they can be used by other clients. While doing so get rid of two obsolete function, i.e register_rpmsg_device() and unregister_rpmsg_device(), to prevent confusion. Tested-by: Guennadi Liakhovetski Signed-off-by: Mathieu Poirier Reviewed-by: Arnaud Pouliquen Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20201120214245.172963-8-mathieu.poirier@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index faf2daff6238..a5db828b2420 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -164,8 +164,9 @@ static inline __rpmsg64 cpu_to_rpmsg64(struct rpmsg_device *rpdev, u64 val) #if IS_ENABLED(CONFIG_RPMSG) -int register_rpmsg_device(struct rpmsg_device *dev); -void unregister_rpmsg_device(struct rpmsg_device *dev); +int rpmsg_register_device(struct rpmsg_device *rpdev); +int rpmsg_unregister_device(struct device *parent, + struct rpmsg_channel_info *chinfo); int __register_rpmsg_driver(struct rpmsg_driver *drv, struct module *owner); void unregister_rpmsg_driver(struct rpmsg_driver *drv); void rpmsg_destroy_ept(struct rpmsg_endpoint *); @@ -188,15 +189,18 @@ __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp, #else -static inline int register_rpmsg_device(struct rpmsg_device *dev) +static inline int rpmsg_register_device(struct rpmsg_device *rpdev) { return -ENXIO; } -static inline void unregister_rpmsg_device(struct rpmsg_device *dev) +static inline int rpmsg_unregister_device(struct device *parent, + struct rpmsg_channel_info *chinfo) { /* This shouldn't be possible */ WARN_ON(1); + + return -ENXIO; } static inline int __register_rpmsg_driver(struct rpmsg_driver *drv, -- cgit v1.2.3 From 950a7388f02bf775515d13dc508cb9d749bd6d91 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Fri, 20 Nov 2020 14:42:45 -0700 Subject: rpmsg: Turn name service into a stand alone driver Make the RPMSG name service announcement a stand alone driver so that it can be reused by other subsystems. It is also the first step in making the functionatlity transport independent, i.e that is not tied to virtIO. Reviewed-by: Guennadi Liakhovetski Tested-by: Guennadi Liakhovetski Co-developed-by: Mathieu Poirier Signed-off-by: Mathieu Poirier Co-developed-by: Guennadi Liakhovetski Signed-off-by: Guennadi Liakhovetski Signed-off-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/20201120214245.172963-9-mathieu.poirier@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/rpmsg/ns.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rpmsg/ns.h b/include/linux/rpmsg/ns.h index 73ecc91dc26f..a7804edd6d58 100644 --- a/include/linux/rpmsg/ns.h +++ b/include/linux/rpmsg/ns.h @@ -4,6 +4,7 @@ #define _LINUX_RPMSG_NS_H #include +#include #include #include @@ -39,4 +40,6 @@ enum rpmsg_ns_flags { /* Address 53 is reserved for advertising remote services */ #define RPMSG_NS_ADDR (53) +int rpmsg_ns_register_device(struct rpmsg_device *rpdev); + #endif -- cgit v1.2.3 From e7bbb7acabf47d74672e0e314bed4d452d2097b4 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 9 Nov 2020 14:24:49 +0530 Subject: dmaengine: add peripheral configuration Some complex dmaengine controllers have capability to program the peripheral device, so pass on the peripheral configuration as part of dma_slave_config Link: https://lore.kernel.org/r/20201109085450.24843-3-vkoul@kernel.org Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index dd357a747780..493a047ed0a2 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -418,6 +418,9 @@ enum dma_slave_buswidth { * @slave_id: Slave requester id. Only valid for slave channels. The dma * slave peripheral will have unique id as dma requester which need to be * pass as slave config. + * @peripheral_config: peripheral configuration for programming peripheral + * for dmaengine transfer + * @peripheral_size: peripheral configuration buffer size * * This struct is passed in as configuration data to a DMA engine * in order to set up a certain channel for DMA transport at runtime. @@ -443,6 +446,8 @@ struct dma_slave_config { u32 dst_port_window_size; bool device_fc; unsigned int slave_id; + void *peripheral_config; + size_t peripheral_size; }; /** -- cgit v1.2.3 From 5d0c3533a19f48e5e7e73806a3e4b29cd4364130 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 9 Nov 2020 14:24:50 +0530 Subject: dmaengine: qcom: Add GPI dma driver This controller provides DMAengine capabilities for a variety of peripheral buses such as I2C, UART, and SPI. By using GPI dmaengine driver, bus drivers can use a standardize interface that is protocol independent to transfer data between memory and peripheral. Link: https://lore.kernel.org/r/20201109085450.24843-4-vkoul@kernel.org Signed-off-by: Vinod Koul --- include/linux/dma/qcom-gpi-dma.h | 83 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 include/linux/dma/qcom-gpi-dma.h (limited to 'include/linux') diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h new file mode 100644 index 000000000000..f46dc3372f11 --- /dev/null +++ b/include/linux/dma/qcom-gpi-dma.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020, Linaro Limited + */ + +#ifndef QCOM_GPI_DMA_H +#define QCOM_GPI_DMA_H + +/** + * enum spi_transfer_cmd - spi transfer commands + */ +enum spi_transfer_cmd { + SPI_TX = 1, + SPI_RX, + SPI_DUPLEX, +}; + +/** + * struct gpi_spi_config - spi config for peripheral + * + * @loopback_en: spi loopback enable when set + * @clock_pol_high: clock polarity + * @data_pol_high: data polarity + * @pack_en: process tx/rx buffers as packed + * @word_len: spi word length + * @clk_div: source clock divider + * @clk_src: serial clock + * @cmd: spi cmd + * @fragmentation: keep CS assserted at end of sequence + * @cs: chip select toggle + * @set_config: set peripheral config + * @rx_len: receive length for buffer + */ +struct gpi_spi_config { + u8 set_config; + u8 loopback_en; + u8 clock_pol_high; + u8 data_pol_high; + u8 pack_en; + u8 word_len; + u8 fragmentation; + u8 cs; + u32 clk_div; + u32 clk_src; + enum spi_transfer_cmd cmd; + u32 rx_len; +}; + +enum i2c_op { + I2C_WRITE = 1, + I2C_READ, +}; + +/** + * struct gpi_i2c_config - i2c config for peripheral + * + * @pack_enable: process tx/rx buffers as packed + * @cycle_count: clock cycles to be sent + * @high_count: high period of clock + * @low_count: low period of clock + * @clk_div: source clock divider + * @addr: i2c bus address + * @stretch: stretch the clock at eot + * @set_config: set peripheral config + * @rx_len: receive length for buffer + * @op: i2c cmd + * @muli-msg: is part of multi i2c r-w msgs + */ +struct gpi_i2c_config { + u8 set_config; + u8 pack_enable; + u8 cycle_count; + u8 high_count; + u8 low_count; + u8 addr; + u8 stretch; + u16 clk_div; + u32 rx_len; + enum i2c_op op; + bool multi_msg; +}; + +#endif /* QCOM_GPI_DMA_H */ -- cgit v1.2.3 From 36ccdf85829a7dd6936dba5d02fa50138471f0d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 23 Nov 2020 18:56:00 +0100 Subject: net, xsk: Avoid taking multiple skbuff references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 642e450b6b59 ("xsk: Do not discard packet when NETDEV_TX_BUSY") addressed the problem that packets were discarded from the Tx AF_XDP ring, when the driver returned NETDEV_TX_BUSY. Part of the fix was bumping the skbuff reference count, so that the buffer would not be freed by dev_direct_xmit(). A reference count larger than one means that the skbuff is "shared", which is not the case. If the "shared" skbuff is sent to the generic XDP receive path, netif_receive_generic_xdp(), and pskb_expand_head() is entered the BUG_ON(skb_shared(skb)) will trigger. This patch adds a variant to dev_direct_xmit(), __dev_direct_xmit(), where a user can select the skbuff free policy. This allows AF_XDP to avoid bumping the reference count, but still keep the NETDEV_TX_BUSY behavior. Fixes: 642e450b6b59 ("xsk: Do not discard packet when NETDEV_TX_BUSY") Reported-by: Yonghong Song Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201123175600.146255-1-bjorn.topel@gmail.com --- include/linux/netdevice.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 964b494b0e8d..76775abf259d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2813,9 +2813,21 @@ u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); + int dev_queue_xmit(struct sk_buff *skb); int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev); -int dev_direct_xmit(struct sk_buff *skb, u16 queue_id); +int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id); + +static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) +{ + int ret; + + ret = __dev_direct_xmit(skb, queue_id); + if (!dev_xmit_complete(ret)) + kfree_skb(skb); + return ret; +} + int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); void unregister_netdevice_many(struct list_head *head); -- cgit v1.2.3 From 159e1de201b6fca10bfec50405a3b53a561096a8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 17 Nov 2020 23:56:05 -0800 Subject: fscrypt: add fscrypt_is_nokey_name() It's possible to create a duplicate filename in an encrypted directory by creating a file concurrently with adding the encryption key. Specifically, sys_open(O_CREAT) (or sys_mkdir(), sys_mknod(), or sys_symlink()) can lookup the target filename while the directory's encryption key hasn't been added yet, resulting in a negative no-key dentry. The VFS then calls ->create() (or ->mkdir(), ->mknod(), or ->symlink()) because the dentry is negative. Normally, ->create() would return -ENOKEY due to the directory's key being unavailable. However, if the key was added between the dentry lookup and ->create(), then the filesystem will go ahead and try to create the file. If the target filename happens to already exist as a normal name (not a no-key name), a duplicate filename may be added to the directory. In order to fix this, we need to fix the filesystems to prevent ->create(), ->mkdir(), ->mknod(), and ->symlink() on no-key names. (->rename() and ->link() need it too, but those are already handled correctly by fscrypt_prepare_rename() and fscrypt_prepare_link().) In preparation for this, add a helper function fscrypt_is_nokey_name() that filesystems can use to do this check. Use this helper function for the existing checks that fs/crypto/ does for rename and link. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201118075609.120337-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index a8f7a43f031b..8e1d31c959bf 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -111,6 +111,35 @@ static inline void fscrypt_handle_d_move(struct dentry *dentry) dentry->d_flags &= ~DCACHE_NOKEY_NAME; } +/** + * fscrypt_is_nokey_name() - test whether a dentry is a no-key name + * @dentry: the dentry to check + * + * This returns true if the dentry is a no-key dentry. A no-key dentry is a + * dentry that was created in an encrypted directory that hasn't had its + * encryption key added yet. Such dentries may be either positive or negative. + * + * When a filesystem is asked to create a new filename in an encrypted directory + * and the new filename's dentry is a no-key dentry, it must fail the operation + * with ENOKEY. This includes ->create(), ->mkdir(), ->mknod(), ->symlink(), + * ->rename(), and ->link(). (However, ->rename() and ->link() are already + * handled by fscrypt_prepare_rename() and fscrypt_prepare_link().) + * + * This is necessary because creating a filename requires the directory's + * encryption key, but just checking for the key on the directory inode during + * the final filesystem operation doesn't guarantee that the key was available + * during the preceding dentry lookup. And the key must have already been + * available during the dentry lookup in order for it to have been checked + * whether the filename already exists in the directory and for the new file's + * dentry not to be invalidated due to it incorrectly having the no-key flag. + * + * Return: %true if the dentry is a no-key name + */ +static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) +{ + return dentry->d_flags & DCACHE_NOKEY_NAME; +} + /* crypto.c */ void fscrypt_enqueue_decrypt_work(struct work_struct *); @@ -244,6 +273,11 @@ static inline void fscrypt_handle_d_move(struct dentry *dentry) { } +static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) +{ + return false; +} + /* crypto.c */ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { -- cgit v1.2.3 From 234f1b7f8daf112655c87f75ae8932564f871225 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 17 Nov 2020 23:56:09 -0800 Subject: fscrypt: remove unnecessary calls to fscrypt_require_key() In an encrypted directory, a regular dentry (one that doesn't have the no-key name flag) can only be created if the directory's encryption key is available. Therefore the calls to fscrypt_require_key() in __fscrypt_prepare_link() and __fscrypt_prepare_rename() are unnecessary, as these functions already check that the dentries they're given aren't no-key names. Remove these unnecessary calls to fscrypt_require_key(). Link: https://lore.kernel.org/r/20201118075609.120337-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 8e1d31c959bf..0c9e64969b73 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -710,8 +710,7 @@ static inline int fscrypt_require_key(struct inode *inode) * * A new link can only be added to an encrypted directory if the directory's * encryption key is available --- since otherwise we'd have no way to encrypt - * the filename. Therefore, we first set up the directory's encryption key (if - * not already done) and return an error if it's unavailable. + * the filename. * * We also verify that the link will not violate the constraint that all files * in an encrypted directory tree use the same encryption policy. -- cgit v1.2.3 From 5903f61e035320104394f721f74cd22171636f63 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 23 Nov 2020 10:54:58 -0500 Subject: entry: Fix boot for !CONFIG_GENERIC_ENTRY A copy-pasta mistake tries to set SYSCALL_WORK flags instead of TIF flags for !CONFIG_GENERIC_ENTRY. Also, add safeguards to catch this at compilation time. Fixes: 3136b93c3fb2 ("entry: Expose helpers to migrate TIF to SYSCALL_WORK flags") Reported-by: Naresh Kamboju Suggested-by: Jann Horn Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/87a6v8qd9p.fsf_-_@collabora.com --- include/linux/thread_info.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 317363212ae9..ca80a214df09 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -35,6 +35,7 @@ enum { GOOD_STACK, }; +#ifdef CONFIG_GENERIC_ENTRY enum syscall_work_bit { SYSCALL_WORK_BIT_SECCOMP, SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT, @@ -48,6 +49,7 @@ enum syscall_work_bit { #define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE) #define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU) #define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT) +#endif #include @@ -129,11 +131,11 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) #else /* CONFIG_GENERIC_ENTRY */ #define set_syscall_work(fl) \ - set_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl) + set_ti_thread_flag(current_thread_info(), TIF_##fl) #define test_syscall_work(fl) \ - test_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl) + test_ti_thread_flag(current_thread_info(), TIF_##fl) #define clear_syscall_work(fl) \ - clear_ti_thread_flag(current_thread_info(), SYSCALL_WORK_##fl) + clear_ti_thread_flag(current_thread_info(), TIF_##fl) #define set_task_syscall_work(t, fl) \ set_ti_thread_flag(task_thread_info(t), TIF_##fl) -- cgit v1.2.3 From 2fb94784952e4b290c392b74c2c67b4afa672523 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 24 Nov 2020 09:33:16 +0800 Subject: soundwire: registers: add definitions for clearable interrupt fields DP0 has reserved fields and the read-only SDCA_CASCADE bit. We should not try to write values in these fields, so add a formal definition for clearable interrupts to be used in DP0 interrupt handling. DPN also has reserved fields so add definitions for clearable interrupts as well. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Guennadi Liakhovetski Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20201124013318.8963-4-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw_registers.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw_registers.h b/include/linux/soundwire/sdw_registers.h index f420e8059779..0cb1a22685b8 100644 --- a/include/linux/soundwire/sdw_registers.h +++ b/include/linux/soundwire/sdw_registers.h @@ -41,6 +41,12 @@ #define SDW_DP0_INT_IMPDEF1 BIT(5) #define SDW_DP0_INT_IMPDEF2 BIT(6) #define SDW_DP0_INT_IMPDEF3 BIT(7) +#define SDW_DP0_INTERRUPTS (SDW_DP0_INT_TEST_FAIL | \ + SDW_DP0_INT_PORT_READY | \ + SDW_DP0_INT_BRA_FAILURE | \ + SDW_DP0_INT_IMPDEF1 | \ + SDW_DP0_INT_IMPDEF2 | \ + SDW_DP0_INT_IMPDEF3) #define SDW_DP0_PORTCTRL_DATAMODE GENMASK(3, 2) #define SDW_DP0_PORTCTRL_NXTINVBANK BIT(4) @@ -241,6 +247,11 @@ #define SDW_DPN_INT_IMPDEF1 BIT(5) #define SDW_DPN_INT_IMPDEF2 BIT(6) #define SDW_DPN_INT_IMPDEF3 BIT(7) +#define SDW_DPN_INTERRUPTS (SDW_DPN_INT_TEST_FAIL | \ + SDW_DPN_INT_PORT_READY | \ + SDW_DPN_INT_IMPDEF1 | \ + SDW_DPN_INT_IMPDEF2 | \ + SDW_DPN_INT_IMPDEF3) #define SDW_DPN_PORTCTRL_FLOWMODE GENMASK(1, 0) #define SDW_DPN_PORTCTRL_DATAMODE GENMASK(3, 2) -- cgit v1.2.3 From 2a2b8eaa5b25668a6f717f94b55f4e3aaf87629d Mon Sep 17 00:00:00 2001 From: Tom Murphy Date: Tue, 24 Nov 2020 16:20:51 +0800 Subject: iommu: Handle freelists when using deferred flushing in iommu drivers Allow the iommu_unmap_fast to return newly freed page table pages and pass the freelist to queue_iova in the dma-iommu ops path. This is useful for iommu drivers (in this case the intel iommu driver) which need to wait for the ioTLB to be flushed before newly free/unmapped page table pages can be freed. This way we can still batch ioTLB free operations and handle the freelists. Signed-off-by: Tom Murphy Signed-off-by: Lu Baolu Tested-by: Logan Gunthorpe Link: https://lore.kernel.org/r/20201124082057.2614359-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- include/linux/iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b95a6f8db6ff..e56bae327e35 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -180,6 +180,7 @@ struct iommu_iotlb_gather { unsigned long start; unsigned long end; size_t pgsize; + struct page *freelist; }; /** -- cgit v1.2.3 From 230309d08b871e439f8618db3610f2cc9b5f7c72 Mon Sep 17 00:00:00 2001 From: Tom Murphy Date: Tue, 24 Nov 2020 16:20:52 +0800 Subject: iommu: Add iommu_dma_free_cpu_cached_iovas() Add a iommu_dma_free_cpu_cached_iovas function to allow drivers which use the dma-iommu ops to free cached cpu iovas. Signed-off-by: Tom Murphy Signed-off-by: Lu Baolu Tested-by: Logan Gunthorpe Link: https://lore.kernel.org/r/20201124082057.2614359-3-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- include/linux/dma-iommu.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 2112f21f73d8..706b68d1359b 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -37,6 +37,9 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc, void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); +void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, + struct iommu_domain *domain); + #else /* CONFIG_IOMMU_DMA */ struct iommu_domain; @@ -78,5 +81,10 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he { } +static inline void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, + struct iommu_domain *domain) +{ +} + #endif /* CONFIG_IOMMU_DMA */ #endif /* __DMA_IOMMU_H */ -- cgit v1.2.3 From a7656ecf825ac0434a5e7bf108ec1a56b65ee5e4 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Wed, 25 Nov 2020 12:30:10 +0530 Subject: iommu/io-pgtable: Add a domain attribute for pagetable configuration Add a new iommu domain attribute DOMAIN_ATTR_IO_PGTABLE_CFG for pagetable configuration which initially will be used to set quirks like for system cache aka last level cache to be used by client drivers like GPU to set right attributes for caching the hardware pagetables into the system cache and later can be extended to include other page table configuration data. Signed-off-by: Sai Prakash Ranjan Link: https://lore.kernel.org/r/9190aa16f378fc0a7f8e57b2b9f60b033e7eeb4f.1606287059.git.saiprakash.ranjan@codeaurora.org Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 4 ++++ include/linux/iommu.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 4cde111e425b..215fd9d69540 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -208,6 +208,10 @@ struct io_pgtable { #define io_pgtable_ops_to_pgtable(x) container_of((x), struct io_pgtable, ops) +struct io_pgtable_domain_attr { + unsigned long quirks; +}; + static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop) { iop->cfg.tlb->tlb_flush_all(iop->cookie); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b95a6f8db6ff..ffaa389ea128 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -118,6 +118,7 @@ enum iommu_attr { DOMAIN_ATTR_FSL_PAMUV1, DOMAIN_ATTR_NESTING, /* two stages of translation */ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, + DOMAIN_ATTR_IO_PGTABLE_CFG, DOMAIN_ATTR_MAX, }; -- cgit v1.2.3 From e67890c97944b9962cf8c140a7f8077ed643b7d7 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Wed, 25 Nov 2020 12:30:11 +0530 Subject: iommu/io-pgtable-arm: Add support to use system cache Add a quirk IO_PGTABLE_QUIRK_ARM_OUTER_WBWA to override the outer-cacheability attributes set in the TCR for a non-coherent page table walker when using system cache. Signed-off-by: Sai Prakash Ranjan Link: https://lore.kernel.org/r/f818676b4a2a9ad1edb92721947d47db41ed6a7c.1606287059.git.saiprakash.ranjan@codeaurora.org Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 215fd9d69540..fb4d5a763e0c 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -86,6 +86,9 @@ struct io_pgtable_cfg { * * IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table * for use in the upper half of a split address space. + * + * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability + * attributes set in the TCR for a non-coherent page-table walker. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -93,6 +96,7 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) #define IO_PGTABLE_QUIRK_NON_STRICT BIT(4) #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) + #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; -- cgit v1.2.3 From 0801a0073f86e020987acbbd96b50f9c85d79de8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:14 +0100 Subject: module: drop version-attribute alignment Commit 98562ad8cb03 ("module: explicitly align module_version_attribute structure") added an alignment attribute to the struct module_version_attribute type in order to fix an alignment issue on m68k where the structure is 2-byte aligned while MODULE_VERSION() forced the __modver section entries to be 4-byte aligned (sizeof(void *)). This was essentially an alternative fix to the problem addressed by b4bc842802db ("module: deal with alignment issues in built-in module versions") which used the array-of-pointer trick to prevent gcc from increasing alignment of the version attribute entries. And with the pointer indirection in place there's no need to increase the alignment of the type. Link: https://lore.kernel.org/lkml/20201103175711.10731-1-johan@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Jessica Yu --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 7ccdf87f376f..293250958512 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -66,7 +66,7 @@ struct module_version_attribute { struct module_attribute mattr; const char *module_name; const char *version; -} __attribute__ ((__aligned__(sizeof(void *)))); +}; extern ssize_t __modver_version_show(struct module_attribute *, struct module_kobject *, char *); -- cgit v1.2.3 From b112082c8930e7aa72422484b2d31d3aa06f58bc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:15 +0100 Subject: module: simplify version-attribute handling Instead of using the array-of-pointers trick to avoid having gcc mess up the built-in module-version array stride, specify type alignment when declaring entries to prevent gcc from increasing alignment. This is essentially an alternative (one-line) fix to the problem addressed by commit b4bc842802db ("module: deal with alignment issues in built-in module versions"). gcc can increase the alignment of larger objects with static extent as an optimisation, but this can be suppressed by using the aligned attribute when declaring variables. Note that we have been relying on this behaviour for kernel parameters for 16 years and it indeed hasn't changed since the introduction of the aligned attribute in gcc-3.1. Link: https://lore.kernel.org/lkml/20201103175711.10731-1-johan@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Jessica Yu --- include/linux/module.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 293250958512..5958075ea3f4 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -266,20 +266,20 @@ extern typeof(name) __mod_##type##__##name##_device_table \ #else #define MODULE_VERSION(_version) \ MODULE_INFO(version, _version); \ - static struct module_version_attribute ___modver_attr = { \ - .mattr = { \ - .attr = { \ - .name = "version", \ - .mode = S_IRUGO, \ + static struct module_version_attribute __modver_attr \ + __used __section("__modver") \ + __aligned(__alignof__(struct module_version_attribute)) \ + = { \ + .mattr = { \ + .attr = { \ + .name = "version", \ + .mode = S_IRUGO, \ + }, \ + .show = __modver_version_show, \ }, \ - .show = __modver_version_show, \ - }, \ - .module_name = KBUILD_MODNAME, \ - .version = _version, \ - }; \ - static const struct module_version_attribute \ - __used __section("__modver") \ - * __moduleparam_const __modver_attr = &___modver_attr + .module_name = KBUILD_MODNAME, \ + .version = _version, \ + }; #endif /* Optional firmware file (or files) needed by the module -- cgit v1.2.3 From 8d6615f1fccc4f39d7d3dcf286b33e8a1e833d2b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:17 +0100 Subject: params: drop redundant "unused" attributes Drop the redundant "unused" attributes from module-parameter structures already marked "used". Link: https://lore.kernel.org/lkml/20201103175711.10731-1-johan@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Jessica Yu --- include/linux/moduleparam.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 6388eb9734a5..742074ad9f6e 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -22,7 +22,7 @@ #define __MODULE_INFO(tag, name, info) \ static const char __UNIQUE_ID(name)[] \ - __used __section(".modinfo") __attribute__((unused, aligned(1))) \ + __used __section(".modinfo") __attribute__((aligned(1))) \ = __MODULE_INFO_PREFIX __stringify(tag) "=" info #define __MODULE_PARM_TYPE(name, _type) \ @@ -289,7 +289,7 @@ struct kparam_array static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used \ - __section("__param") __attribute__ ((unused, aligned(sizeof(void *)))) \ + __section("__param") __attribute__ ((aligned(sizeof(void *)))) \ = { __param_str_##name, THIS_MODULE, ops, \ VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } -- cgit v1.2.3 From fe2f4fe139b321a38daafc715aeb7d21d9e8e5ad Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:18 +0100 Subject: params: use type alignment for kernel parameters Specify type alignment for kernel parameters instead of sizeof(void *). The alignment attribute is used to prevent gcc from increasing the alignment of objects with static extent as an optimisation, something which would mess up the __param array stride. Using __alignof__(struct kernel_param) rather than sizeof(void *) is preferred since it better indicates why it is there and doesn't break should the type size or alignment change. Note that on m68k the alignment of struct kernel_param is actually two and that adding a 1- or 2-byte field to the 20-byte struct would cause a breakage with the current 4-byte alignment. Link: https://lore.kernel.org/lkml/20201103175711.10731-1-johan@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Jessica Yu --- include/linux/moduleparam.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 742074ad9f6e..15ecc6cc3a3b 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -288,8 +288,8 @@ struct kparam_array /* Default value instead of permissions? */ \ static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ - __used \ - __section("__param") __attribute__ ((aligned(sizeof(void *)))) \ + __used __section("__param") \ + __aligned(__alignof__(struct kernel_param)) \ = { __param_str_##name, THIS_MODULE, ops, \ VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } -- cgit v1.2.3 From 2aec389e19150ed3bf67ab708f2435563f76050f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:19 +0100 Subject: params: clean up module-param macros Clean up the module-param macros by adding some indentation and using the __aligned() macro to improve readability. Link: https://lore.kernel.org/lkml/20201103175711.10731-1-johan@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Jessica Yu --- include/linux/moduleparam.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 15ecc6cc3a3b..eed280fae433 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -21,12 +21,12 @@ #define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) #define __MODULE_INFO(tag, name, info) \ -static const char __UNIQUE_ID(name)[] \ - __used __section(".modinfo") __attribute__((aligned(1))) \ - = __MODULE_INFO_PREFIX __stringify(tag) "=" info + static const char __UNIQUE_ID(name)[] \ + __used __section(".modinfo") __aligned(1) \ + = __MODULE_INFO_PREFIX __stringify(tag) "=" info #define __MODULE_PARM_TYPE(name, _type) \ - __MODULE_INFO(parmtype, name##type, #name ":" _type) + __MODULE_INFO(parmtype, name##type, #name ":" _type) /* One for each parameter, describing how to use it. Some files do multiple of these per line, so can't just use MODULE_INFO. */ -- cgit v1.2.3 From 367c820ef08082e68df8a3bc12e62393af21e4b5 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Wed, 7 Oct 2020 14:21:43 +0530 Subject: arm64: Enable perf events based hard lockup detector With the recent feature added to enable perf events to use pseudo NMIs as interrupts on platforms which support GICv3 or later, its now been possible to enable hard lockup detector (or NMI watchdog) on arm64 platforms. So enable corresponding support. One thing to note here is that normally lockup detector is initialized just after the early initcalls but PMU on arm64 comes up much later as device_initcall(). So we need to re-initialize lockup detection once PMU has been initialized. Signed-off-by: Sumit Garg Acked-by: Alexandru Elisei Link: https://lore.kernel.org/r/1602060704-10921-1-git-send-email-sumit.garg@linaro.org Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 505480217cf1..bf7966776c55 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -163,6 +163,8 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn); static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } #endif +bool arm_pmu_irq_is_nmi(void); + /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void); -- cgit v1.2.3 From 63653368c25ff0b1b1aaf045c97ea87bd8c16123 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Wed, 25 Nov 2020 14:58:17 +0800 Subject: block: remove unused BIO_SPLIT_ENTRIES Since commit 4b1faf931650 ("block: Kill bio_pair_split()"), there's no user of BIO_SPLIT_ENTRIES anymore. Signed-off-by: Jeffle Xu Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index c6d765382926..ecf67108f091 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -711,12 +711,6 @@ static inline bool bioset_initialized(struct bio_set *bs) return bs->bio_slab != NULL; } -/* - * a small number of entries is fine, not going to be performance critical. - * basically we just need to survive - */ -#define BIO_SPLIT_ENTRIES 2 - #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_for_each_vec(bvl, bip, iter) \ -- cgit v1.2.3 From 6527b938426f7fa66051273568d234b1fe01a15b Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 23 Nov 2020 17:38:17 +0200 Subject: net: phy: remove the .did_interrupt() and .ack_interrupt() callback Now that all the PHY drivers have been migrated to directly implement the generic .handle_interrupt() callback for a seamless support of shared IRQs and all the .config_inter() implementations clear any pending interrupts, we can safely remove the two callbacks. With this patch, phylib has a proper support for shared IRQs (and not just for multi-PHY devices. A PHY driver must implement both the .handle_interrupt() and .config_intr() callbacks for the IRQs to be actually used. Signed-off-by: Ioana Ciornei Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 8849a00a093f..381a95732b6a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -743,18 +743,11 @@ struct phy_driver { /** @read_status: Determines the negotiated speed and duplex */ int (*read_status)(struct phy_device *phydev); - /** @ack_interrupt: Clears any pending interrupts */ - int (*ack_interrupt)(struct phy_device *phydev); - - /** @config_intr: Enables or disables interrupts */ - int (*config_intr)(struct phy_device *phydev); - - /** - * @did_interrupt: Checks if the PHY generated an interrupt. - * For multi-PHY devices with shared PHY interrupt pin - * Set interrupt bits have to be cleared. + /** @config_intr: Enables or disables interrupts. + * It should also clear any pending interrupts prior to enabling the + * IRQs and after disabling them. */ - int (*did_interrupt)(struct phy_device *phydev); + int (*config_intr)(struct phy_device *phydev); /** @handle_interrupt: Override default interrupt handling */ irqreturn_t (*handle_interrupt)(struct phy_device *phydev); @@ -1487,10 +1480,6 @@ static inline int genphy_config_aneg(struct phy_device *phydev) return __genphy_config_aneg(phydev, false); } -static inline int genphy_no_ack_interrupt(struct phy_device *phydev) -{ - return 0; -} static inline int genphy_no_config_intr(struct phy_device *phydev) { return 0; -- cgit v1.2.3 From 403319be5de51167cd70ddf594b76c95e6d26844 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 24 Nov 2020 15:12:08 +0000 Subject: ima: Implement ima_inode_hash This is in preparation to add a helper for BPF LSM programs to use IMA hashes when attached to LSM hooks. There are LSM hooks like inode_unlink which do not have a struct file * argument and cannot use the existing ima_file_hash API. An inode based API is, therefore, useful in LSM based detections like an executable trying to delete itself which rely on the inode_unlink LSM hook. Moreover, the ima_file_hash function does nothing with the struct file pointer apart from calling file_inode on it and converting it to an inode. Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Mimi Zohar Link: https://lore.kernel.org/bpf/20201124151210.1081188-2-kpsingh@chromium.org --- include/linux/ima.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 8fa7bcfb2da2..7233a2751754 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -29,6 +29,7 @@ extern int ima_post_read_file(struct file *file, void *buf, loff_t size, enum kernel_read_file_id id); extern void ima_post_path_mknod(struct dentry *dentry); extern int ima_file_hash(struct file *file, char *buf, size_t buf_size); +extern int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size); extern void ima_kexec_cmdline(int kernel_fd, const void *buf, int size); #ifdef CONFIG_IMA_KEXEC @@ -115,6 +116,11 @@ static inline int ima_file_hash(struct file *file, char *buf, size_t buf_size) return -EOPNOTSUPP; } +static inline int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size) +{ + return -EOPNOTSUPP; +} + static inline void ima_kexec_cmdline(int kernel_fd, const void *buf, int size) {} #endif /* CONFIG_IMA */ -- cgit v1.2.3 From 8b5536ad1216c47fb9b37ef2cd0cfa70d79d4645 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 24 Nov 2020 18:49:28 +0800 Subject: lockdep: Introduce in_softirq lockdep assert The current semantic for napi_consume_skb() is that caller need to provide non-zero budget when calling from NAPI context, and breaking this semantic will cause hard to debug problem, because _kfree_skb_defer() need to run in atomic context in order to push the skb to the particular cpu' napi_alloc_cache atomically. So add the lockdep_assert_in_softirq() to assert when the running context is not in_softirq, in_softirq means softirq is serving or BH is disabled, which has a ambiguous semantics due to the BH disabled confusion, so add a comment to emphasize that. And the softirq context can be interrupted by hard IRQ or NMI context, lockdep_assert_in_softirq() need to assert about hard IRQ or NMI context too. Suggested-by: Jakub Kicinski Signed-off-by: Yunsheng Lin Signed-off-by: Jakub Kicinski --- include/linux/lockdep.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index f5594879175a..92771bc1791f 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -594,6 +594,16 @@ do { \ this_cpu_read(hardirqs_enabled))); \ } while (0) +/* + * Acceptable for protecting per-CPU resources accessed from BH. + * Much like in_softirq() - semantics are ambiguous, use carefully. + */ +#define lockdep_assert_in_softirq() \ +do { \ + WARN_ON_ONCE(__lockdep_enabled && \ + (!in_softirq() || in_irq() || in_nmi())); \ +} while (0) + #else # define might_lock(lock) do { } while (0) # define might_lock_read(lock) do { } while (0) @@ -605,6 +615,7 @@ do { \ # define lockdep_assert_preemption_enabled() do { } while (0) # define lockdep_assert_preemption_disabled() do { } while (0) +# define lockdep_assert_in_softirq() do { } while (0) #endif #ifdef CONFIG_PROVE_RAW_LOCK_NESTING -- cgit v1.2.3 From 4c1ad562d303526b5d9b49f5e0d72da13ef78dec Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 20 Nov 2020 21:20:42 -0600 Subject: remoteproc: Add a rproc_set_firmware() API A new API, rproc_set_firmware() is added to allow the remoteproc platform drivers and remoteproc client drivers to be able to configure a custom firmware name that is different from the default name used during remoteproc registration. This function is being introduced to provide a kernel-level equivalent of the current sysfs interface to remoteproc client drivers, and can only change firmwares when the remoteproc is offline. This allows some remoteproc drivers to choose different firmwares at runtime based on the functionality the remote processor is providing. The TI PRU Ethernet driver will be an example of such usage as it requires to use different firmwares for different supported protocols. Also, update the firmware_store() function used by the sysfs interface to reuse this function to avoid code duplication. Reviewed-by: Rishabh Bhatnagar Signed-off-by: Suman Anna Link: https://lore.kernel.org/r/20201121032042.6195-1-s-anna@ti.com Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 3fa3ba6498e8..e8ac041c64d9 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -653,6 +653,7 @@ rproc_of_resm_mem_entry_init(struct device *dev, u32 of_resm_idx, size_t len, int rproc_boot(struct rproc *rproc); void rproc_shutdown(struct rproc *rproc); +int rproc_set_firmware(struct rproc *rproc, const char *fw_name); void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type); int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size); int rproc_coredump_add_custom_segment(struct rproc *rproc, -- cgit v1.2.3 From 7656ca71b0ba04ae7437afe3d046e9134442678e Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 25 Nov 2020 15:06:41 +0300 Subject: usb: pd: DFP product types USB Power Delivery Specification R3.0 introduced separate field for the DFP product type to the ID Header VDO. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201125120642.37156-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd_vdo.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/pd_vdo.h b/include/linux/usb/pd_vdo.h index 8c5cb5830754..8c08eeb9a74b 100644 --- a/include/linux/usb/pd_vdo.h +++ b/include/linux/usb/pd_vdo.h @@ -103,17 +103,25 @@ * -------------------- * <31> :: data capable as a USB host * <30> :: data capable as a USB device - * <29:27> :: product type + * <29:27> :: product type (UFP / Cable) * <26> :: modal operation supported (1b == yes) - * <25:16> :: Reserved, Shall be set to zero + * <25:16> :: product type (DFP) * <15:0> :: USB-IF assigned VID for this cable vendor */ #define IDH_PTYPE_UNDEF 0 #define IDH_PTYPE_HUB 1 #define IDH_PTYPE_PERIPH 2 +#define IDH_PTYPE_PSD 3 +#define IDH_PTYPE_AMA 5 + #define IDH_PTYPE_PCABLE 3 #define IDH_PTYPE_ACABLE 4 -#define IDH_PTYPE_AMA 5 + +#define IDH_PTYPE_DFP_UNDEF 0 +#define IDH_PTYPE_DFP_HUB 1 +#define IDH_PTYPE_DFP_HOST 2 +#define IDH_PTYPE_DFP_PB 3 +#define IDH_PTYPE_DFP_AMC 4 #define VDO_IDH(usbh, usbd, ptype, is_modal, vid) \ ((usbh) << 31 | (usbd) << 30 | ((ptype) & 0x7) << 27 \ @@ -122,6 +130,7 @@ #define PD_IDH_PTYPE(vdo) (((vdo) >> 27) & 0x7) #define PD_IDH_VID(vdo) ((vdo) & 0xffff) #define PD_IDH_MODAL_SUPP(vdo) ((vdo) & (1 << 26)) +#define PD_IDH_DFP_PTYPE(vdo) (((vdo) >> 23) & 0x7) /* * Cert Stat VDO -- cgit v1.2.3 From 640586f8af356096e084d69a9909d217852bde48 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 19 Nov 2020 17:02:21 +0100 Subject: powerpc/ptrace: Simplify gpr_get()/tm_cgpr_get() gpr_get() does membuf_write() twice to override pt_regs->msr in between. We can call membuf_write() once and change ->msr in the kernel buffer, this simplifies the code and the next fix. The patch adds a new simple helper, membuf_at(offs), it returns the new membuf which can be safely used after membuf_write(). Signed-off-by: Oleg Nesterov [mpe: Fixup some minor whitespace issues noticed by Christophe] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201119160221.GA5188@redhat.com --- include/linux/regset.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regset.h b/include/linux/regset.h index c3403f328257..a00765f0e8cf 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -46,6 +46,18 @@ static inline int membuf_write(struct membuf *s, const void *v, size_t size) return s->left; } +static inline struct membuf membuf_at(const struct membuf *s, size_t offs) +{ + struct membuf n = *s; + + if (offs > n.left) + offs = n.left; + n.p += offs; + n.left -= offs; + + return n; +} + /* current s->p must be aligned for v; v must be a scalar */ #define membuf_store(s, v) \ ({ \ -- cgit v1.2.3 From 11e5e568ceed7c8c570313a14fa96c72f21dad31 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 24 Nov 2020 17:48:04 -0800 Subject: usb: typec: tcpm: Stay in SNK_TRY_WAIT_DEBOUNCE_CHECK_VBUS till Rp is seen TD.4.7.3. Try SNK DRP Connect Try.SRC DRP fails. The compliance tester mimics being a Try.SRC USB-C port. The failure is due to TCPM exiting SNK_TRY_WAIT_DEBOUNCE_CHECK_VBUS when VBUS is not present eventhough when SNK.Rp is seen. Exit to SRC_TRYWAIT from SNK_TRY_WAIT_DEBOUNCE_CHECK_VBUS only when SNK.Rp is not seen for PD_T_TRY_CC_DEBOUNCE. >From the spec: The port shall then transition to Attached.SNK when the SNK.Rp state is detected on exactly one of the CC1 or CC2 pins for at least tTryCCDebounce and VBUS is detected. Alternatively, the port shall transition to TryWait.SRC if SNK.Rp state is not detected for tTryCCDebounce. Reviewed-by: Guenter Roeck Signed-off-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20201125014804.1596719-1-badhri@google.com Acked-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index 3a805e2ecbc9..63a66dd5d832 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -484,6 +484,7 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_T_CC_DEBOUNCE 200 /* 100 - 200 ms */ #define PD_T_PD_DEBOUNCE 20 /* 10 - 20 ms */ +#define PD_T_TRY_CC_DEBOUNCE 15 /* 10 - 20 ms */ #define PD_N_CAPS_COUNT (PD_T_NO_RESPONSE / PD_T_SEND_SOURCE_CAP) #define PD_N_HARD_RESET_COUNT 2 -- cgit v1.2.3 From 07e21d4d96493fd0a8220ab134855253a34a9c84 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 4 Nov 2020 01:22:22 +0800 Subject: soundwire: SDCA: add helper macro to access controls The upcoming SDCA (SoundWire Device Class Audio) specification defines a hierarchical encoding to interface with Class-defined capabilities. The specification is not yet accessible to the general public but this information is released with explicit permission from the MIPI Board to avoid delays with SDCA support on Linux platforms. A block of 64 MBytes of register addresses are allocated to SDCA controls, starting at address 0x40000000. The 26 LSBs which identify individual controls are set based on the following variables: - Function Number. An SCDA device can be split in up to 8 independent Functions. Each of these Functions is described in the SDCA specification, e.g. Smart Amplifier, Smart Microphone, Simple Microphone, Jack codec, HID, etc. - Entity Number. Within each Function, an Entity is an identifiable block. Up to 127 Entities are connected in a pre-defined graph (similar to USB), with Entity0 reserved for Function-level configurations. In contrast to USB, the SDCA spec pre-defines Function Types, topologies, and allowed options, i.e. the degree of freedom is not unlimited to limit the possibility of errors in descriptors leading to software quirks. - Control Selector. Within each Entity, the SDCA specification defines 48 controls such as Mute, Gain, AGC, etc, and 16 implementation defined ones. Some Control Selectors might be used for low-level platform setup, and other exposed to applications and users. Note that the same Control Selector capability, e.g. Latency control, might be located at different offsets in different entities, the Control Selector mapping is Entity-specific. - Control Number. Some Control Selectors allow channel-specific values to be set, with up to 64 channels allowed. This is mostly used for volume control. - Current/Next values. Some Control Selectors are 'Dual-Ranked'. Software may either update the Current value directly for immediate effect. Alternatively, software may write into the 'Next' values and update the SoundWire 1.2 'Commit Groups' register to copy 'Next' values into 'Current' ones in a synchronized manner. This is different from bank switching which is typically used to change the bus configuration only. - MBQ. the Multi-Byte Quantity bit is used to provide atomic updates when accessing more that one byte, for example a 16-bit volume control would be updated consistently, the intermediate values mixing old MSB with new LSB are not applied. These 6 parameters are used to build a 32-bit address to access the desired Controls. Because of address range, paging is required, but the most often used parameter values are placed in the lower 16 bits of the address. This helps to keep the paging registers constant while updating Controls for a specific Device/Function. Reviewed-by: Rander Wang Reviewed-by: Guennadi Liakhovetski Reviewed-by: Kai Vehmanen Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Acked-By: Vinod Koul Link: https://lore.kernel.org/r/20201103172226.4278-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/linux/soundwire/sdw_registers.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw_registers.h b/include/linux/soundwire/sdw_registers.h index f420e8059779..e14dff9a9c7f 100644 --- a/include/linux/soundwire/sdw_registers.h +++ b/include/linux/soundwire/sdw_registers.h @@ -298,4 +298,36 @@ #define SDW_CASC_PORT_MASK_INTSTAT3 1 #define SDW_CASC_PORT_REG_OFFSET_INTSTAT3 2 +/* + * v1.2 device - SDCA address mapping + * + * Spec definition + * Bits Contents + * 31 0 (required by addressing range) + * 30:26 0b10000 (Control Prefix) + * 25 0 (Reserved) + * 24:22 Function Number [2:0] + * 21 Entity[6] + * 20:19 Control Selector[5:4] + * 18 0 (Reserved) + * 17:15 Control Number[5:3] + * 14 Next + * 13 MBQ + * 12:7 Entity[5:0] + * 6:3 Control Selector[3:0] + * 2:0 Control Number[2:0] + */ + +#define SDW_SDCA_CTL(fun, ent, ctl, ch) (BIT(30) | \ + (((fun) & 0x7) << 22) | \ + (((ent) & 0x40) << 15) | \ + (((ent) & 0x3f) << 7) | \ + (((ctl) & 0x30) << 15) | \ + (((ctl) & 0x0f) << 3) | \ + (((ch) & 0x38) << 12) | \ + ((ch) & 0x07)) + +#define SDW_SDCA_MBQ_CTL(reg) ((reg) | BIT(13)) +#define SDW_SDCA_NEXT_CTL(reg) ((reg) | BIT(14)) + #endif /* __SDW_REGISTERS_H */ -- cgit v1.2.3 From fb5103f9d6ce197b4d0b67b4e60e68470f5293d1 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 4 Nov 2020 01:22:23 +0800 Subject: regmap/SoundWire: sdw: add support for SoundWire 1.2 MBQ The SoundWire 1.1 specification only allowed for reads and writes of bytes. The SoundWire 1.2 specification adds a new capability to transfer "Multi-Byte Quantities" (MBQ) across the bus. The transfers still happens one-byte-at-a-time, but the update is atomic. For example when writing a 16-bit volume, the first byte transferred is only taken into account when the second byte is successfully transferred. The mechanism is symmetrical for read and writes: - On a read, the address of the last byte to be read is modified by setting the MBQ bit - On a write, the address of all but the last byte to be written are modified by setting the MBQ bit. The address for the last byte relies on the MBQ bit being cleared. The current definitions for MBQ-based controls in the SDCA draft standard are limited to 16 bits for volumes, so for now this is the only supported format. An update will be provided if and when support for 24-bit and 32-bit values is specified by the SDCA standard. One possible objection is that this code could have been handled with regmap-sdw.c. However this is a new spec addition not handled by every SoundWire 1.1 and non-SDCA device, so there's no reason to load code that will never be used. Also in practice it's extremely unlikely that CONFIG_REGMAP would not be selected with CONFIG_REGMAP_MBQ selected. However there's no functional dependency between the two modules so they can be selected separately. Reviewed-by: Rander Wang Reviewed-by: Guennadi Liakhovetski Reviewed-by: Kai Vehmanen Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/20201103172226.4278-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index e7834d98207f..a652d1474d6a 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -570,6 +570,10 @@ struct regmap *__regmap_init_sdw(struct sdw_slave *sdw, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__regmap_init_sdw_mbq(struct sdw_slave *sdw, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); struct regmap *__regmap_init_spi_avmm(struct spi_device *spi, const struct regmap_config *config, struct lock_class_key *lock_key, @@ -619,6 +623,10 @@ struct regmap *__devm_regmap_init_sdw(struct sdw_slave *sdw, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__devm_regmap_init_sdw_mbq(struct sdw_slave *sdw, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); struct regmap *__devm_regmap_init_slimbus(struct slim_device *slimbus, const struct regmap_config *config, struct lock_class_key *lock_key, @@ -817,6 +825,19 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); __regmap_lockdep_wrapper(__regmap_init_sdw, #config, \ sdw, config) +/** + * regmap_init_sdw_mbq() - Initialise register map + * + * @sdw: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer to + * a struct regmap. + */ +#define regmap_init_sdw_mbq(sdw, config) \ + __regmap_lockdep_wrapper(__regmap_init_sdw_mbq, #config, \ + sdw, config) + /** * regmap_init_spi_avmm() - Initialize register map for Intel SPI Slave * to AVMM Bus Bridge @@ -989,6 +1010,20 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); __regmap_lockdep_wrapper(__devm_regmap_init_sdw, #config, \ sdw, config) +/** + * devm_regmap_init_sdw_mbq() - Initialise managed register map + * + * @sdw: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_sdw_mbq(sdw, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_sdw_mbq, #config, \ + sdw, config) + /** * devm_regmap_init_slimbus() - Initialise managed register map * -- cgit v1.2.3 From 4df910620bebb5cfe234af16ac8f6474b60215fd Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 25 Nov 2020 13:22:21 +0800 Subject: mm: memcg: relayout structure mem_cgroup to avoid cache interference 0day reported one -22.7% regression for will-it-scale page_fault2 case [1] on a 4 sockets 144 CPU platform, and bisected to it to be caused by Waiman's optimization (commit bd0b230fe1) of saving one 'struct page_counter' space for 'struct mem_cgroup'. Initially we thought it was due to the cache alignment change introduced by the patch, but further debug shows that it is due to some hot data members ('vmstats_local', 'vmstats_percpu', 'vmstats') sit in 2 adjacent cacheline (2N and 2N+1 cacheline), and when adjacent cache line prefetch is enabled, it triggers an "extended level" of cache false sharing for 2 adjacent cache lines. So exchange the 2 member blocks, while keeping mostly the original cache alignment, which can restore and even enhance the performance, and save 64 bytes of space for 'struct mem_cgroup' (from 2880 to 2816, with 0day's default RHEL-8.3 kernel config) [1]. https://lore.kernel.org/lkml/20201102091543.GM31092@shao2-debian/ Fixes: bd0b230fe145 ("mm/memcg: unify swap and memsw page counters") Reported-by: kernel test robot Signed-off-by: Feng Tang Acked-by: Waiman Long Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a80c59af2c60..922a7f600465 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -282,20 +282,6 @@ struct mem_cgroup { MEMCG_PADDING(_pad1_); - /* - * set > 0 if pages under this cgroup are moving to other cgroup. - */ - atomic_t moving_account; - struct task_struct *move_lock_task; - - /* Legacy local VM stats and events */ - struct memcg_vmstats_percpu __percpu *vmstats_local; - - /* Subtree VM stats and events (batched updates) */ - struct memcg_vmstats_percpu __percpu *vmstats_percpu; - - MEMCG_PADDING(_pad2_); - atomic_long_t vmstats[MEMCG_NR_STAT]; atomic_long_t vmevents[NR_VM_EVENT_ITEMS]; @@ -317,6 +303,20 @@ struct mem_cgroup { struct list_head objcg_list; /* list of inherited objcgs */ #endif + MEMCG_PADDING(_pad2_); + + /* + * set > 0 if pages under this cgroup are moving to other cgroup. + */ + atomic_t moving_account; + struct task_struct *move_lock_task; + + /* Legacy local VM stats and events */ + struct memcg_vmstats_percpu __percpu *vmstats_local; + + /* Subtree VM stats and events (batched updates) */ + struct memcg_vmstats_percpu __percpu *vmstats_percpu; + #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; struct wb_domain cgwb_domain; -- cgit v1.2.3 From 2a2970891647fee7e7ea767425f895140faffaa8 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Fri, 20 Nov 2020 15:03:24 -0800 Subject: net/mlx5: Add sample offload hardware bits and structures Hardware introduces flow sampler object for packet sampling. Add the offload hardware bits and structures. Signed-off-by: Chris Mi Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 651591a2965d..65ea35af0527 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10657,11 +10657,13 @@ struct mlx5_ifc_affiliated_event_header_bits { enum { MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT(0xc), MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT(0x13), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT(0x20), }; enum { MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc, MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13, + MLX5_GENERAL_OBJECT_TYPES_SAMPLER = 0x20, }; enum { @@ -10736,6 +10738,33 @@ struct mlx5_ifc_create_encryption_key_in_bits { struct mlx5_ifc_encryption_key_obj_bits encryption_key_object; }; +struct mlx5_ifc_sampler_obj_bits { + u8 modify_field_select[0x40]; + + u8 table_type[0x8]; + u8 level[0x8]; + u8 reserved_at_50[0xf]; + u8 ignore_flow_level[0x1]; + + u8 sample_ratio[0x20]; + + u8 reserved_at_80[0x8]; + u8 sample_table_id[0x18]; + + u8 reserved_at_a0[0x8]; + u8 default_table_id[0x18]; + + u8 sw_steering_icm_address_rx[0x40]; + u8 sw_steering_icm_address_tx[0x40]; + + u8 reserved_at_140[0xa0]; +}; + +struct mlx5_ifc_create_sampler_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + struct mlx5_ifc_sampler_obj_bits sampler_object; +}; + enum { MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128 = 0x0, MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256 = 0x1, -- cgit v1.2.3 From 38730630880c6f47ad73dd90524ff52443b8bc48 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Fri, 20 Nov 2020 15:03:25 -0800 Subject: net/mlx5: Add sampler destination type The flow sampler object is a new destination type. Add a new member for the flow destination. Signed-off-by: Chris Mi Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 + include/linux/mlx5/mlx5_ifc.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 846d94ad04bc..35d2cc1646d3 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -132,6 +132,7 @@ struct mlx5_flow_destination { struct mlx5_pkt_reformat *pkt_reformat; u8 flags; } vport; + u32 sampler_id; }; }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 65ea35af0527..2f2add4bd5e1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1616,6 +1616,7 @@ enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_VPORT = 0x0, MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, MLX5_FLOW_DESTINATION_TYPE_TIR = 0x2, + MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER = 0x6, MLX5_FLOW_DESTINATION_TYPE_PORT = 0x99, MLX5_FLOW_DESTINATION_TYPE_COUNTER = 0x100, -- cgit v1.2.3 From 7da3ad6c26f41f403fe6823c3de242551db09c37 Mon Sep 17 00:00:00 2001 From: Muhammad Sammar Date: Fri, 20 Nov 2020 15:03:27 -0800 Subject: net/mlx5: Add misc4 to mlx5_ifc_fte_match_param_bits Add misc4 match params to enable matching on prog_sample_fields. Signed-off-by: Muhammad Sammar Reviewed-by: Alex Vesker Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 1 + include/linux/mlx5/mlx5_ifc.h | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index cf824366a7d1..e9639c4cf2ed 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1076,6 +1076,7 @@ enum { MLX5_MATCH_INNER_HEADERS = 1 << 2, MLX5_MATCH_MISC_PARAMETERS_2 = 1 << 3, MLX5_MATCH_MISC_PARAMETERS_3 = 1 << 4, + MLX5_MATCH_MISC_PARAMETERS_4 = 1 << 5, }; enum { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2f2add4bd5e1..11c24fafd7f2 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -623,6 +623,26 @@ struct mlx5_ifc_fte_match_set_misc3_bits { u8 reserved_at_140[0xc0]; }; +struct mlx5_ifc_fte_match_set_misc4_bits { + u8 prog_sample_field_value_0[0x20]; + + u8 prog_sample_field_id_0[0x20]; + + u8 prog_sample_field_value_1[0x20]; + + u8 prog_sample_field_id_1[0x20]; + + u8 prog_sample_field_value_2[0x20]; + + u8 prog_sample_field_id_2[0x20]; + + u8 prog_sample_field_value_3[0x20]; + + u8 prog_sample_field_id_3[0x20]; + + u8 reserved_at_100[0x100]; +}; + struct mlx5_ifc_cmd_pas_bits { u8 pa_h[0x20]; @@ -1669,7 +1689,9 @@ struct mlx5_ifc_fte_match_param_bits { struct mlx5_ifc_fte_match_set_misc3_bits misc_parameters_3; - u8 reserved_at_a00[0x600]; + struct mlx5_ifc_fte_match_set_misc4_bits misc_parameters_4; + + u8 reserved_at_c00[0x400]; }; enum { @@ -5462,6 +5484,7 @@ enum { MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_INNER_HEADERS = 0x2, MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2 = 0x3, MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_3 = 0x4, + MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_4 = 0x5, }; struct mlx5_ifc_query_flow_group_out_bits { -- cgit v1.2.3 From 59d2ae1db89f5a491d48f798ffccef36cc69ca65 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Fri, 20 Nov 2020 15:03:28 -0800 Subject: net/mlx5: Add ts_cqe_to_dest_cqn related bits Add a bit in HCA capabilities layout to indicate if ts_cqe_to_dest_cqn is supported. In addition, add ts_cqe_to_dest_cqn field to SQ context, for driver to set the actual CQN. Signed-off-by: Eran Ben Elisha Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 11c24fafd7f2..632b9a61fda5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1261,7 +1261,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ece_support[0x1]; u8 reserved_at_a4[0x7]; u8 log_max_srq[0x5]; - u8 reserved_at_b0[0x10]; + u8 reserved_at_b0[0x2]; + u8 ts_cqe_to_dest_cqn[0x1]; + u8 reserved_at_b3[0xd]; u8 max_sgl_for_optimized_performance[0x8]; u8 log_max_cq_sz[0x8]; @@ -3312,8 +3314,12 @@ struct mlx5_ifc_sqc_bits { u8 reserved_at_80[0x10]; u8 hairpin_peer_vhca[0x10]; - u8 reserved_at_a0[0x50]; + u8 reserved_at_a0[0x20]; + u8 reserved_at_c0[0x8]; + u8 ts_cqe_to_dest_cqn[0x18]; + + u8 reserved_at_e0[0x10]; u8 packet_pacing_rate_limit_index[0x10]; u8 tis_lst_sz[0x10]; u8 reserved_at_110[0x10]; -- cgit v1.2.3 From e5dfe6b57e8eada1c238dd2c7020345b0d8f6454 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 20 Nov 2020 15:03:29 -0800 Subject: net/mlx5: Avoid exposing driver internal command helpers mlx5 command init and cleanup routines are internal to mlx5_core driver. Hence, avoid exporting them and move their definition to mlx5_core driver's internal file mlx5_core.h Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index add85094f9a5..5e84b1d53650 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -888,10 +888,6 @@ enum { CMD_ALLOWED_OPCODE_ALL, }; -int mlx5_cmd_init(struct mlx5_core_dev *dev); -void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); -void mlx5_cmd_set_state(struct mlx5_core_dev *dev, - enum mlx5_cmdif_state cmdif_state); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode); -- cgit v1.2.3 From 349125ba232ea53d71a57c65c81f109c323cc369 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 20 Nov 2020 15:03:31 -0800 Subject: net/mlx5: Update the hardware interface definition for vhca state Update the hardware interface definitions to query and modify vhca state, related EQE and event code. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 7 +++++++ include/linux/mlx5/mlx5_ifc.h | 17 ++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index e9639c4cf2ed..f1de49d64a98 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -346,6 +346,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED = 0xe, + MLX5_EVENT_TYPE_VHCA_STATE_CHANGE = 0xf, MLX5_EVENT_TYPE_DCT_DRAINED = 0x1c, MLX5_EVENT_TYPE_DCT_KEY_VIOLATION = 0x1d, @@ -717,6 +718,11 @@ struct mlx5_eqe_sync_fw_update { u8 sync_rst_state; }; +struct mlx5_eqe_vhca_state { + __be16 ec_function; + __be16 function_id; +} __packed; + union ev_data { __be32 raw[7]; struct mlx5_eqe_cmd cmd; @@ -736,6 +742,7 @@ union ev_data { struct mlx5_eqe_temp_warning temp_warning; struct mlx5_eqe_xrq_err xrq_err; struct mlx5_eqe_sync_fw_update sync_fw_update; + struct mlx5_eqe_vhca_state vhca_state; } __packed; struct mlx5_eqe { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 632b9a61fda5..3ace1976514c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -299,6 +299,8 @@ enum { MLX5_CMD_OP_CREATE_UMEM = 0xa08, MLX5_CMD_OP_DESTROY_UMEM = 0xa0a, MLX5_CMD_OP_SYNC_STEERING = 0xb00, + MLX5_CMD_OP_QUERY_VHCA_STATE = 0xb0d, + MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e, MLX5_CMD_OP_MAX }; @@ -1244,7 +1246,15 @@ enum mlx5_fc_bulk_alloc_bitmask { #define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum)) struct mlx5_ifc_cmd_hca_cap_bits { - u8 reserved_at_0[0x30]; + u8 reserved_at_0[0x20]; + + u8 reserved_at_20[0x3]; + u8 event_on_vhca_state_teardown_request[0x1]; + u8 event_on_vhca_state_in_use[0x1]; + u8 event_on_vhca_state_active[0x1]; + u8 event_on_vhca_state_allocated[0x1]; + u8 event_on_vhca_state_invalid[0x1]; + u8 reserved_at_28[0x8]; u8 vhca_id[0x10]; u8 reserved_at_40[0x40]; @@ -1534,7 +1544,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 disable_local_lb_uc[0x1]; u8 disable_local_lb_mc[0x1]; u8 log_min_hairpin_wq_data_sz[0x5]; - u8 reserved_at_3e8[0x3]; + u8 reserved_at_3e8[0x2]; + u8 vhca_state[0x1]; u8 log_max_vlan_list[0x5]; u8 reserved_at_3f0[0x3]; u8 log_max_current_mc_list[0x5]; @@ -1602,7 +1613,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 max_num_of_monitor_counters[0x10]; u8 num_ppcnt_monitor_counters[0x10]; - u8 reserved_at_640[0x10]; + u8 max_num_sf[0x10]; u8 num_q_monitor_counters[0x10]; u8 reserved_at_660[0x20]; -- cgit v1.2.3 From 21adf05d4584c99a07a604224b9cfeddcc6bc47c Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Fri, 20 Nov 2020 15:03:32 -0800 Subject: net/mlx5: Expose IP-in-IP TX and RX capability bits Expose FW indication that it supports stateless offloads for IP over IP tunneled packets per direction. In some HW like ConnectX-4 IP-in-IP support is not symmetric, it supports steering on the inner header but it doesn't TX-Checksum and TSO. Add IP-in-IP capability per direction to cover this case as well. Note: only if both indications are turned on, the global tunnel_stateless_ip_over_ip is on too. Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3ace1976514c..96888f9f822d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -913,7 +913,10 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 tunnel_stateless_ipv4_over_vxlan[0x1]; u8 tunnel_stateless_ip_over_ip[0x1]; u8 insert_trailer[0x1]; - u8 reserved_at_2b[0x5]; + u8 reserved_at_2b[0x1]; + u8 tunnel_stateless_ip_over_ip_rx[0x1]; + u8 tunnel_stateless_ip_over_ip_tx[0x1]; + u8 reserved_at_2e[0x2]; u8 max_vxlan_udp_ports[0x8]; u8 reserved_at_38[0x6]; u8 max_geneve_opt_len[0x1]; -- cgit v1.2.3 From 959af5569f57d189b5c4fccae45f16d5ff01aa39 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Fri, 20 Nov 2020 15:03:33 -0800 Subject: net/mlx5: Expose other function ifc bits Expose other function ifc bits to enable setting HCA caps on behalf of other function. In addition, expose vhca_resource_manager bit to control whether the other function functionality is supported by firmware. Signed-off-by: Yishai Hadas Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 96888f9f822d..3e337386faa8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1249,7 +1249,8 @@ enum mlx5_fc_bulk_alloc_bitmask { #define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum)) struct mlx5_ifc_cmd_hca_cap_bits { - u8 reserved_at_0[0x20]; + u8 reserved_at_0[0x1f]; + u8 vhca_resource_manager[0x1]; u8 reserved_at_20[0x3]; u8 event_on_vhca_state_teardown_request[0x1]; @@ -4247,7 +4248,11 @@ struct mlx5_ifc_set_hca_cap_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_function[0x1]; + u8 reserved_at_41[0xf]; + u8 function_id[0x10]; + + u8 reserved_at_60[0x20]; union mlx5_ifc_hca_cap_union_bits capability; }; -- cgit v1.2.3 From 3b1e58aa832ed537289be6a51a2015309688a90c Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 20 Nov 2020 15:03:36 -0800 Subject: net/mlx5: Make API mlx5_core_is_ecpf accept const pointer Subsequent patch implements helper API which has mlx5_core_dev as const pointer, make its caller API too const *. Signed-off-by: Parav Pandit Reviewed-by: Bodong Wang Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5e84b1d53650..d6ef3068d7d3 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1133,7 +1133,7 @@ static inline bool mlx5_core_is_vf(const struct mlx5_core_dev *dev) return dev->coredev_type == MLX5_COREDEV_VF; } -static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev) +static inline bool mlx5_core_is_ecpf(const struct mlx5_core_dev *dev) { return dev->caps.embedded_cpu; } -- cgit v1.2.3 From 8a90f2fc67826a3876df1cb72e42d4a9a135f4fa Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 20 Nov 2020 15:03:37 -0800 Subject: net/mlx5: Rename peer_pf to host_pf To match the hardware spec, rename peer_pf to host_pf. Signed-off-by: Parav Pandit Reviewed-by: Bodong Wang Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d6ef3068d7d3..8e9bcb3bfd77 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -547,7 +547,7 @@ struct mlx5_priv { atomic_t reg_pages; struct list_head free_list; int vfs_pages; - int peer_pf_pages; + int host_pf_pages; struct mlx5_core_health health; -- cgit v1.2.3 From 617b860c1875842d9cc3338d7dabd2b3538038f1 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 20 Nov 2020 15:03:39 -0800 Subject: net/mlx5: Treat host PF vport as other (non eswitch manager) vport When eswitch manager is running on ECPF, host PF should be treated as non eswitch manager port, similar to other VF vports. Fail to do so, results in firmware treating PF's vport as ECPF vport for eswitch ACL tables. Non zero check to figure out if a given vport is other vport or not is not sufficient becase PF vport number = 0 on ECPF. Hence, create esw acl tables with an attribute of other vport. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 35d2cc1646d3..1f51f4c3b1af 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -50,6 +50,7 @@ enum { MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1), MLX5_FLOW_TABLE_TERMINATION = BIT(2), MLX5_FLOW_TABLE_UNMANAGED = BIT(3), + MLX5_FLOW_TABLE_OTHER_VPORT = BIT(4), }; #define LEFTOVERS_RULE_NUM 2 @@ -174,9 +175,7 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, struct mlx5_flow_table * mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, - int prio, - int num_flow_table_entries, - u32 level, u16 vport); + struct mlx5_flow_table_attr *ft_attr, u16 vport); struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( struct mlx5_flow_namespace *ns, int prio, u32 level); -- cgit v1.2.3 From 8d8d53cf8fd028310b1189165b939cde124895d7 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 29 Oct 2020 12:52:40 +1100 Subject: dma-mapping: Allow mixing bypass and mapped DMA operation At the moment we allow bypassing DMA ops only when we can do this for the entire RAM. However there are configs with mixed type memory where we could still allow bypassing IOMMU in most cases; POWERPC with persistent memory is one example. This adds an arch hook to determine where bypass can still work and we invoke direct DMA API. The following patch checks the bus limit on POWERPC to allow or disallow direct mapping. This adds a ARCH_HAS_DMA_MAP_DIRECT config option to make the arch_xxxx hooks no-op by default. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Christoph Hellwig --- include/linux/dma-map-ops.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index a5f89fc4d6df..38c8a4558e08 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -314,6 +314,20 @@ static inline void arch_dma_mark_clean(phys_addr_t paddr, size_t size) void *arch_dma_set_uncached(void *addr, size_t size); void arch_dma_clear_uncached(void *addr, size_t size); +#ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT +bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr); +bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle); +bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg, + int nents); +bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, + int nents); +#else +#define arch_dma_map_page_direct(d, a) (false) +#define arch_dma_unmap_page_direct(d, a) (false) +#define arch_dma_map_sg_direct(d, s, n) (false) +#define arch_dma_unmap_sg_direct(d, s, n) (false) +#endif + #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent); -- cgit v1.2.3 From c7a5899eb26e2a4d516d53f65b6dd67be2228041 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Tue, 17 Nov 2020 17:47:23 +0100 Subject: xfrm: redact SA secret with lockdown confidentiality redact XFRM SA secret in the netlink response to xfrm_get_sa() or dumpall sa. Enable lockdown, confidentiality mode, at boot or at run time. e.g. when enabled: cat /sys/kernel/security/lockdown none integrity [confidentiality] ip xfrm state src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x00000002 reqid 2 mode tunnel replay-window 0 aead rfc4106(gcm(aes)) 0x0000000000000000000000000000000000000000 96 note: the aead secret is redacted. Redacting secret is also a FIPS 140-2 requirement. v1->v2 - add size checks before memset calls v2->v3 - replace spaces with tabs for consistency v3->v4 - use kernel lockdown instead of a /proc setting v4->v5 - remove kconfig option Reviewed-by: Stephan Mueller Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- include/linux/security.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index bc2725491560..1112a79a7dba 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -127,6 +127,7 @@ enum lockdown_reason { LOCKDOWN_PERF, LOCKDOWN_TRACEFS, LOCKDOWN_XMON_RW, + LOCKDOWN_XFRM_SECRET, LOCKDOWN_CONFIDENTIALITY_MAX, }; -- cgit v1.2.3 From 928296ea5da37838d7127de4b10f47cd97401b13 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Fri, 30 Oct 2020 12:36:11 +0100 Subject: soc: mediatek: pm_domains: Make bus protection generic Bus protection is not exclusively done by calling the infracfg misc driver. Make the calls for setting and clearing the bus protection generic so that we can use other blocks for it as well. Signed-off-by: Matthias Brugger Signed-off-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/20201030113622.201188-6-enric.balletbo@collabora.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/infracfg.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 233463d789c6..5bcaab767f6a 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -32,6 +32,11 @@ #define MT7622_TOP_AXI_PROT_EN_WB (BIT(2) | BIT(6) | \ BIT(7) | BIT(8)) +#define INFRA_TOPAXI_PROTECTEN 0x0220 +#define INFRA_TOPAXI_PROTECTSTA1 0x0228 +#define INFRA_TOPAXI_PROTECTEN_SET 0x0260 +#define INFRA_TOPAXI_PROTECTEN_CLR 0x0264 + #define REG_INFRA_MISC 0xf00 #define F_DDR_4GB_SUPPORT_EN BIT(13) -- cgit v1.2.3 From eb9fa767fbe19d3db7d303e9fde7f3056221ffe1 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Fri, 30 Oct 2020 12:36:17 +0100 Subject: soc: mediatek: pm-domains: Add support for mt8183 Add the needed board data to support mt8183 SoC. Signed-off-by: Matthias Brugger Signed-off-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/20201030113622.201188-12-enric.balletbo@collabora.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/infracfg.h | 46 +++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 5bcaab767f6a..9d01e32e19bc 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -2,6 +2,52 @@ #ifndef __SOC_MEDIATEK_INFRACFG_H #define __SOC_MEDIATEK_INFRACFG_H +#define MT8183_TOP_AXI_PROT_EN_STA1 0x228 +#define MT8183_TOP_AXI_PROT_EN_STA1_1 0x258 +#define MT8183_TOP_AXI_PROT_EN_SET 0x2a0 +#define MT8183_TOP_AXI_PROT_EN_CLR 0x2a4 +#define MT8183_TOP_AXI_PROT_EN_1_SET 0x2a8 +#define MT8183_TOP_AXI_PROT_EN_1_CLR 0x2ac +#define MT8183_TOP_AXI_PROT_EN_MCU_SET 0x2c4 +#define MT8183_TOP_AXI_PROT_EN_MCU_CLR 0x2c8 +#define MT8183_TOP_AXI_PROT_EN_MCU_STA1 0x2e4 +#define MT8183_TOP_AXI_PROT_EN_MM_SET 0x2d4 +#define MT8183_TOP_AXI_PROT_EN_MM_CLR 0x2d8 +#define MT8183_TOP_AXI_PROT_EN_MM_STA1 0x2ec + +#define MT8183_TOP_AXI_PROT_EN_DISP (BIT(10) | BIT(11)) +#define MT8183_TOP_AXI_PROT_EN_CONN (BIT(13) | BIT(14)) +#define MT8183_TOP_AXI_PROT_EN_MFG (BIT(21) | BIT(22)) +#define MT8183_TOP_AXI_PROT_EN_CAM BIT(28) +#define MT8183_TOP_AXI_PROT_EN_VPU_TOP BIT(27) +#define MT8183_TOP_AXI_PROT_EN_1_DISP (BIT(16) | BIT(17)) +#define MT8183_TOP_AXI_PROT_EN_1_MFG GENMASK(21, 19) +#define MT8183_TOP_AXI_PROT_EN_MM_ISP (BIT(3) | BIT(8)) +#define MT8183_TOP_AXI_PROT_EN_MM_ISP_2ND BIT(10) +#define MT8183_TOP_AXI_PROT_EN_MM_CAM (BIT(4) | BIT(5) | \ + BIT(9) | BIT(13)) +#define MT8183_TOP_AXI_PROT_EN_MM_VPU_TOP (GENMASK(9, 6) | \ + BIT(12)) +#define MT8183_TOP_AXI_PROT_EN_MM_VPU_TOP_2ND (BIT(10) | BIT(11)) +#define MT8183_TOP_AXI_PROT_EN_MM_CAM_2ND BIT(11) +#define MT8183_TOP_AXI_PROT_EN_MCU_VPU_CORE0_2ND (BIT(0) | BIT(2) | \ + BIT(4)) +#define MT8183_TOP_AXI_PROT_EN_MCU_VPU_CORE1_2ND (BIT(1) | BIT(3) | \ + BIT(5)) +#define MT8183_TOP_AXI_PROT_EN_MCU_VPU_CORE0 BIT(6) +#define MT8183_TOP_AXI_PROT_EN_MCU_VPU_CORE1 BIT(7) + +#define MT8183_SMI_COMMON_CLAMP_EN 0x3c0 +#define MT8183_SMI_COMMON_CLAMP_EN_SET 0x3c4 +#define MT8183_SMI_COMMON_CLAMP_EN_CLR 0x3c8 + +#define MT8183_SMI_COMMON_SMI_CLAMP_DISP GENMASK(7, 0) +#define MT8183_SMI_COMMON_SMI_CLAMP_VENC BIT(1) +#define MT8183_SMI_COMMON_SMI_CLAMP_ISP BIT(2) +#define MT8183_SMI_COMMON_SMI_CLAMP_CAM (BIT(3) | BIT(4)) +#define MT8183_SMI_COMMON_SMI_CLAMP_VPU_TOP (BIT(5) | BIT(6)) +#define MT8183_SMI_COMMON_SMI_CLAMP_VDEC BIT(7) + #define MT8173_TOP_AXI_PROT_EN_MCI_M2 BIT(0) #define MT8173_TOP_AXI_PROT_EN_MM_M0 BIT(1) #define MT8173_TOP_AXI_PROT_EN_MM_M1 BIT(2) -- cgit v1.2.3 From a49d5e7a89d644a5c0ddc851be4bbf08614e6015 Mon Sep 17 00:00:00 2001 From: Weiyi Lu Date: Fri, 30 Oct 2020 12:36:22 +0100 Subject: soc: mediatek: pm-domains: Add support for mt8192 Add the needed board data to support mt8192 SoC. Signed-off-by: Weiyi Lu Signed-off-by: Enric Balletbo i Serra Tested-by: Weiyi Lu Link: https://lore.kernel.org/r/20201030113622.201188-17-enric.balletbo@collabora.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/infracfg.h | 56 +++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 9d01e32e19bc..e7842debc05d 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -2,6 +2,62 @@ #ifndef __SOC_MEDIATEK_INFRACFG_H #define __SOC_MEDIATEK_INFRACFG_H +#define MT8192_TOP_AXI_PROT_EN_STA1 0x228 +#define MT8192_TOP_AXI_PROT_EN_1_STA1 0x258 +#define MT8192_TOP_AXI_PROT_EN_SET 0x2a0 +#define MT8192_TOP_AXI_PROT_EN_CLR 0x2a4 +#define MT8192_TOP_AXI_PROT_EN_1_SET 0x2a8 +#define MT8192_TOP_AXI_PROT_EN_1_CLR 0x2ac +#define MT8192_TOP_AXI_PROT_EN_MM_SET 0x2d4 +#define MT8192_TOP_AXI_PROT_EN_MM_CLR 0x2d8 +#define MT8192_TOP_AXI_PROT_EN_MM_STA1 0x2ec +#define MT8192_TOP_AXI_PROT_EN_2_SET 0x714 +#define MT8192_TOP_AXI_PROT_EN_2_CLR 0x718 +#define MT8192_TOP_AXI_PROT_EN_2_STA1 0x724 +#define MT8192_TOP_AXI_PROT_EN_VDNR_SET 0xb84 +#define MT8192_TOP_AXI_PROT_EN_VDNR_CLR 0xb88 +#define MT8192_TOP_AXI_PROT_EN_VDNR_STA1 0xb90 +#define MT8192_TOP_AXI_PROT_EN_MM_2_SET 0xdcc +#define MT8192_TOP_AXI_PROT_EN_MM_2_CLR 0xdd0 +#define MT8192_TOP_AXI_PROT_EN_MM_2_STA1 0xdd8 + +#define MT8192_TOP_AXI_PROT_EN_DISP (BIT(6) | BIT(23)) +#define MT8192_TOP_AXI_PROT_EN_CONN (BIT(13) | BIT(18)) +#define MT8192_TOP_AXI_PROT_EN_CONN_2ND BIT(14) +#define MT8192_TOP_AXI_PROT_EN_MFG1 GENMASK(22, 21) +#define MT8192_TOP_AXI_PROT_EN_1_CONN BIT(10) +#define MT8192_TOP_AXI_PROT_EN_1_MFG1 BIT(21) +#define MT8192_TOP_AXI_PROT_EN_1_CAM BIT(22) +#define MT8192_TOP_AXI_PROT_EN_2_CAM BIT(0) +#define MT8192_TOP_AXI_PROT_EN_2_ADSP BIT(3) +#define MT8192_TOP_AXI_PROT_EN_2_AUDIO BIT(4) +#define MT8192_TOP_AXI_PROT_EN_2_MFG1 GENMASK(6, 5) +#define MT8192_TOP_AXI_PROT_EN_2_MFG1_2ND BIT(7) +#define MT8192_TOP_AXI_PROT_EN_MM_CAM (BIT(0) | BIT(2)) +#define MT8192_TOP_AXI_PROT_EN_MM_DISP (BIT(0) | BIT(2) | \ + BIT(10) | BIT(12) | \ + BIT(14) | BIT(16) | \ + BIT(24) | BIT(26)) +#define MT8192_TOP_AXI_PROT_EN_MM_CAM_2ND (BIT(1) | BIT(3)) +#define MT8192_TOP_AXI_PROT_EN_MM_DISP_2ND (BIT(1) | BIT(3) | \ + BIT(15) | BIT(17) | \ + BIT(25) | BIT(27)) +#define MT8192_TOP_AXI_PROT_EN_MM_ISP2 BIT(14) +#define MT8192_TOP_AXI_PROT_EN_MM_ISP2_2ND BIT(15) +#define MT8192_TOP_AXI_PROT_EN_MM_IPE BIT(16) +#define MT8192_TOP_AXI_PROT_EN_MM_IPE_2ND BIT(17) +#define MT8192_TOP_AXI_PROT_EN_MM_VDEC BIT(24) +#define MT8192_TOP_AXI_PROT_EN_MM_VDEC_2ND BIT(25) +#define MT8192_TOP_AXI_PROT_EN_MM_VENC BIT(26) +#define MT8192_TOP_AXI_PROT_EN_MM_VENC_2ND BIT(27) +#define MT8192_TOP_AXI_PROT_EN_MM_2_ISP BIT(8) +#define MT8192_TOP_AXI_PROT_EN_MM_2_DISP (BIT(8) | BIT(12)) +#define MT8192_TOP_AXI_PROT_EN_MM_2_ISP_2ND BIT(9) +#define MT8192_TOP_AXI_PROT_EN_MM_2_DISP_2ND (BIT(9) | BIT(13)) +#define MT8192_TOP_AXI_PROT_EN_MM_2_MDP BIT(12) +#define MT8192_TOP_AXI_PROT_EN_MM_2_MDP_2ND BIT(13) +#define MT8192_TOP_AXI_PROT_EN_VDNR_CAM BIT(21) + #define MT8183_TOP_AXI_PROT_EN_STA1 0x228 #define MT8183_TOP_AXI_PROT_EN_STA1_1 0x258 #define MT8183_TOP_AXI_PROT_EN_SET 0x2a0 -- cgit v1.2.3 From 431ec7bd4d52caa2fc20fbe87744f522e3d1efad Mon Sep 17 00:00:00 2001 From: Michael Klein Date: Fri, 27 Nov 2020 13:52:01 +0100 Subject: mfd: si476x-core.h: Fix "regulator" spelling in comment "regualtor" -> "regulator" Signed-off-by: Michael Klein Signed-off-by: Lee Jones --- include/linux/mfd/si476x-core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/si476x-core.h b/include/linux/mfd/si476x-core.h index 4708c2b8512a..dd95c37ca134 100644 --- a/include/linux/mfd/si476x-core.h +++ b/include/linux/mfd/si476x-core.h @@ -57,7 +57,7 @@ enum si476x_mfd_cells { * @SI476X_POWER_DOWN: In this state all regulators are turned off * and the reset line is pulled low. The device is completely * inactive. - * @SI476X_POWER_UP_FULL: In this state all the power regualtors are + * @SI476X_POWER_UP_FULL: In this state all the power regulators are * turned on, reset line pulled high, IRQ line is enabled(polling is * active for polling use scenario) and device is turned on with * POWER_UP command. The device is ready to be used. -- cgit v1.2.3 From 6a0eaf5123e0e1223252b88cd5775f74105e27bd Mon Sep 17 00:00:00 2001 From: Dean Camera Date: Thu, 26 Nov 2020 09:39:34 +1100 Subject: HID: Increase HID maximum report size to 16KB Currently the maximum HID report size which can be buffered by the kernel is 8KB. This is sufficient for the vast majority of HID devices on the market, as most HID reports are fairly small. However, some unusual devices such as the Elgate Stream Deck exist which use a report size slightly over 8KB for the image data that is sent to the device. Reports these large cannot be buffered by the regular HID subsystem currently, thus the only way to use such device is to bypass the HID subsystem entirely. This increases the maximum HID report size to 16KB, which should cover all sanely designed HID devices. Signed-off-by: Dean Camera Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index b079146850e6..c39d71eb1fd0 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -494,7 +494,7 @@ struct hid_report_enum { }; #define HID_MIN_BUFFER_SIZE 64 /* make sure there is at least a packet size of space */ -#define HID_MAX_BUFFER_SIZE 8192 /* 8kb */ +#define HID_MAX_BUFFER_SIZE 16384 /* 16kb */ #define HID_CONTROL_FIFO_SIZE 256 /* to init devices with >100 reports */ #define HID_OUTPUT_FIFO_SIZE 64 -- cgit v1.2.3 From fd3bb8f54a88107570334c156efb0c724a261003 Mon Sep 17 00:00:00 2001 From: Evan Green Date: Fri, 27 Nov 2020 10:28:34 +0000 Subject: nvmem: core: Add support for keepout regions Introduce support into the nvmem core for arrays of register ranges that should not result in actual device access. For these regions a constant byte (repeated) is returned instead on read, and writes are quietly ignored and returned as successful. This is useful for instance if certain efuse regions are protected from access by Linux because they contain secret info to another part of the system (like an integrated modem). Signed-off-by: Evan Green Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20201127102837.19366-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-provider.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 06409a6c40bc..e162b757b6d5 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -30,6 +30,19 @@ enum nvmem_type { #define NVMEM_DEVID_NONE (-1) #define NVMEM_DEVID_AUTO (-2) +/** + * struct nvmem_keepout - NVMEM register keepout range. + * + * @start: The first byte offset to avoid. + * @end: One beyond the last byte offset to avoid. + * @value: The byte to fill reads with for this region. + */ +struct nvmem_keepout { + unsigned int start; + unsigned int end; + unsigned char value; +}; + /** * struct nvmem_config - NVMEM device configuration * @@ -39,6 +52,8 @@ enum nvmem_type { * @owner: Pointer to exporter module. Used for refcounting. * @cells: Optional array of pre-defined NVMEM cells. * @ncells: Number of elements in cells. + * @keepout: Optional array of keepout ranges (sorted ascending by start). + * @nkeepout: Number of elements in the keepout array. * @type: Type of the nvmem storage * @read_only: Device is read-only. * @root_only: Device is accessibly to root only. @@ -66,6 +81,8 @@ struct nvmem_config { struct gpio_desc *wp_gpio; const struct nvmem_cell_info *cells; int ncells; + const struct nvmem_keepout *keepout; + unsigned int nkeepout; enum nvmem_type type; bool read_only; bool root_only; -- cgit v1.2.3 From 24c8a743336a1fdf42c0c768b4435633069c6a39 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 30 Sep 2020 20:48:02 +0200 Subject: pcmcia: at91_cf: move definitions locally struct at91_cf_data is only used in the driver since all the platforms moved to device tree, move its definition locally. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200930184804.3127757-1-alexandre.belloni@bootlin.com --- include/linux/platform_data/atmel.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h index 99e6069c5fd8..73f63be509c4 100644 --- a/include/linux/platform_data/atmel.h +++ b/include/linux/platform_data/atmel.h @@ -6,18 +6,6 @@ #ifndef __ATMEL_H__ #define __ATMEL_H__ - /* Compact Flash */ -struct at91_cf_data { - int irq_pin; /* I/O IRQ */ - int det_pin; /* Card detect */ - int vcc_pin; /* power switching */ - int rst_pin; /* card reset */ - u8 chipselect; /* EBI Chip Select number */ - u8 flags; -#define AT91_CF_TRUE_IDE 0x01 -#define AT91_IDE_SWAP_A0_A2 0x02 -}; - /* FIXME: this needs a better location, but gets stuff building again */ #ifdef CONFIG_ATMEL_PM extern int at91_suspend_entering_slow_clock(void); -- cgit v1.2.3 From a69dcdfc2dd21f86cb1f79f98fc94c52f96cff64 Mon Sep 17 00:00:00 2001 From: Chun-Kuang Hu Date: Mon, 2 Nov 2020 08:04:38 +0800 Subject: soc / drm: mediatek: cmdq: Remove timeout handler in helper function For each client driver, its timeout handler need to dump hardware register or its state machine information, and their way to detect timeout are also different, so remove timeout handler in helper function and let client driver implement its own timeout handler. Signed-off-by: Chun-Kuang Hu Acked-by: Matthias Brugger Link: https://lore.kernel.org/r/20201102000438.29225-1-chunkuang.hu@kernel.org Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/mtk-cmdq.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index 960704d75994..8e9996610978 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -11,7 +11,6 @@ #include #include -#define CMDQ_NO_TIMEOUT 0xffffffffu #define CMDQ_ADDR_HIGH(addr) ((u32)(((addr) >> 16) & GENMASK(31, 0))) #define CMDQ_ADDR_LOW(addr) ((u16)(addr) | BIT(1)) @@ -24,12 +23,8 @@ struct cmdq_client_reg { }; struct cmdq_client { - spinlock_t lock; - u32 pkt_cnt; struct mbox_client client; struct mbox_chan *chan; - struct timer_list timer; - u32 timeout_ms; /* in unit of microsecond */ }; /** @@ -51,13 +46,10 @@ int cmdq_dev_get_client_reg(struct device *dev, * cmdq_mbox_create() - create CMDQ mailbox client and channel * @dev: device of CMDQ mailbox client * @index: index of CMDQ mailbox channel - * @timeout: timeout of a pkt execution by GCE, in unit of microsecond, set - * CMDQ_NO_TIMEOUT if a timer is not used. * * Return: CMDQ mailbox client pointer */ -struct cmdq_client *cmdq_mbox_create(struct device *dev, int index, - u32 timeout); +struct cmdq_client *cmdq_mbox_create(struct device *dev, int index); /** * cmdq_mbox_destroy() - destroy CMDQ mailbox client and channel -- cgit v1.2.3 From 51c0e618b219c025ddaaf14baea8942cb7e2105b Mon Sep 17 00:00:00 2001 From: Yongqiang Niu Date: Tue, 6 Oct 2020 21:33:17 +0200 Subject: soc / drm: mediatek: Move DDP component defines into mtk-mmsys.h MMSYS is the driver which controls the routing of these DDP components, so the definition of the mtk_ddp_comp_id enum should be placed in mtk-mmsys.h Signed-off-by: Yongqiang Niu Signed-off-by: Enric Balletbo i Serra Reviewed-by: Chun-Kuang Hu Link: https://lore.kernel.org/r/20201006193320.405529-2-enric.balletbo@collabora.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/mtk-mmsys.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h index 7bab5d9a3d31..2228bf6133da 100644 --- a/include/linux/soc/mediatek/mtk-mmsys.h +++ b/include/linux/soc/mediatek/mtk-mmsys.h @@ -9,6 +9,39 @@ enum mtk_ddp_comp_id; struct device; +enum mtk_ddp_comp_id { + DDP_COMPONENT_AAL0, + DDP_COMPONENT_AAL1, + DDP_COMPONENT_BLS, + DDP_COMPONENT_CCORR, + DDP_COMPONENT_COLOR0, + DDP_COMPONENT_COLOR1, + DDP_COMPONENT_DITHER, + DDP_COMPONENT_DPI0, + DDP_COMPONENT_DPI1, + DDP_COMPONENT_DSI0, + DDP_COMPONENT_DSI1, + DDP_COMPONENT_DSI2, + DDP_COMPONENT_DSI3, + DDP_COMPONENT_GAMMA, + DDP_COMPONENT_OD0, + DDP_COMPONENT_OD1, + DDP_COMPONENT_OVL0, + DDP_COMPONENT_OVL_2L0, + DDP_COMPONENT_OVL_2L1, + DDP_COMPONENT_OVL1, + DDP_COMPONENT_PWM0, + DDP_COMPONENT_PWM1, + DDP_COMPONENT_PWM2, + DDP_COMPONENT_RDMA0, + DDP_COMPONENT_RDMA1, + DDP_COMPONENT_RDMA2, + DDP_COMPONENT_UFOE, + DDP_COMPONENT_WDMA0, + DDP_COMPONENT_WDMA1, + DDP_COMPONENT_ID_MAX, +}; + void mtk_mmsys_ddp_connect(struct device *dev, enum mtk_ddp_comp_id cur, enum mtk_ddp_comp_id next); -- cgit v1.2.3 From 206e7383b34316cf56cdde96eab9d97e9a1dbd70 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 26 Nov 2020 11:20:35 +0100 Subject: bus: mhi: core: Indexed MHI controller name Today the MHI controller name is simply cloned from the underlying bus device (its parent), that gives the following device structure for e.g. a MHI/PCI controller: devices/pci0000:00/0000:00:01.2/0000:02:00.0/0000:02:00.0 devices/pci0000:00/0000:00:01.2/0000:02:00.0/0000:02:00.0/0000:02:00.0_IPCR ... That's quite misleading/confusing and can cause device registering issues because of duplicate dev name (e.g. if a PCI device register two different MHI instances). This patch changes MHI core to create indexed mhi controller names (mhi0, mhi1...) in the same way as other busses (i2c0, usb0...). The previous example becomes: devices/pci0000:00/0000:00:01.2/0000:02:00.0/mhi0 devices/pci0000:00/0000:00:01.2/0000:02:00.0/mhi0/mhi0_IPCR ... v2: move index field at the end of mhi_controller struct (before bool) to avoid breaking well packed alignment. Signed-off-by: Loic Poulain Reviewed-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index d31efcf02ae7..aa9757e71f1f 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -348,6 +348,7 @@ struct mhi_controller_config { * @read_reg: Read a MHI register via the physical link (required) * @write_reg: Write a MHI register via the physical link (required) * @buffer_len: Bounce buffer length + * @index: Index of the MHI controller instance * @bounce_buf: Use of bounce buffer * @fbc_download: MHI host needs to do complete image transfer (optional) * @pre_init: MHI host needs to do pre-initialization before power up @@ -438,6 +439,7 @@ struct mhi_controller { u32 val); size_t buffer_len; + int index; bool bounce_buf; bool fbc_download; bool pre_init; -- cgit v1.2.3 From 7776bcd241e08e13ef009926c6dea84dc3b2f8ff Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 3 Nov 2020 00:48:44 +0100 Subject: power: supply: s3c-adc-battery: Convert to GPIO descriptors This converts the S3C ADC battery to use GPIO descriptors instead of a global GPIO number for the charging completed GPIO. Using the pattern from the GPIO charger we name this GPIO line "charge-status" in the board file. Cc: linux-samsung-soc@vger.kernel.org Cc: Sergiy Kibrik Signed-off-by: Linus Walleij Signed-off-by: Sebastian Reichel --- include/linux/s3c_adc_battery.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/s3c_adc_battery.h b/include/linux/s3c_adc_battery.h index 833871dcf6fd..57f982c375f8 100644 --- a/include/linux/s3c_adc_battery.h +++ b/include/linux/s3c_adc_battery.h @@ -14,9 +14,6 @@ struct s3c_adc_bat_pdata { void (*enable_charger)(void); void (*disable_charger)(void); - int gpio_charge_finished; - int gpio_inverted; - const struct s3c_adc_bat_thresh *lut_noac; unsigned int lut_noac_cnt; const struct s3c_adc_bat_thresh *lut_acin; -- cgit v1.2.3 From b0327ffb133fb2148fc3bc2afb39af2871ab21cb Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 30 Oct 2020 13:24:24 +0100 Subject: power: supply: generic-adc-battery: Use GPIO descriptors This driver uses platform data to pass GPIO lines using the deprecated global GPIO numbers. There are no in-tree users of this platform data. Any out-of-tree or coming users of this driver can easily be migrated to use machine descriptor tables as described in Documentation/driver-api/gpio/board.rst section "platform data". Cc: Anish Kumar Cc: H. Nikolaus Schaller Signed-off-by: Linus Walleij Signed-off-by: Sebastian Reichel --- include/linux/power/generic-adc-battery.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power/generic-adc-battery.h b/include/linux/power/generic-adc-battery.h index 40f9c7628f7b..c68cbf34cd34 100644 --- a/include/linux/power/generic-adc-battery.h +++ b/include/linux/power/generic-adc-battery.h @@ -11,16 +11,12 @@ * @battery_info: recommended structure to specify static power supply * parameters * @cal_charge: calculate charge level. - * @gpio_charge_finished: gpio for the charger. - * @gpio_inverted: Should be 1 if the GPIO is active low otherwise 0 * @jitter_delay: delay required after the interrupt to check battery * status.Default set is 10ms. */ struct gab_platform_data { struct power_supply_info battery_info; int (*cal_charge)(long value); - int gpio_charge_finished; - bool gpio_inverted; int jitter_delay; }; -- cgit v1.2.3 From 51e7bf4534da678da27c0f51e7ff21804fae88ca Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:05 +0200 Subject: mtd: nand: ecc: Add an I/O request tweaking mechanism Currently, BCH and Hamming engine are sharing the same tweaking/restoring I/O mechanism: they need the I/O request to fully cover the main/OOB area. Let's make this code generic as sharing the code between two drivers is already a win. Maybe other ECC engine drivers will need it too. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-2-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 697ea2474a7c..36e4fe08d0ea 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -278,6 +278,38 @@ int nand_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req); bool nand_ecc_is_strong_enough(struct nand_device *nand); +/** + * struct nand_ecc_req_tweak_ctx - Help for automatically tweaking requests + * @orig_req: Pointer to the original IO request + * @nand: Related NAND device, to have access to its memory organization + * @page_buffer_size: Real size of the page buffer to use (can be set by the + * user before the tweaking mechanism initialization) + * @oob_buffer_size: Real size of the OOB buffer to use (can be set by the + * user before the tweaking mechanism initialization) + * @spare_databuf: Data bounce buffer + * @spare_oobbuf: OOB bounce buffer + * @bounce_data: Flag indicating a data bounce buffer is used + * @bounce_oob: Flag indicating an OOB bounce buffer is used + */ +struct nand_ecc_req_tweak_ctx { + struct nand_page_io_req orig_req; + struct nand_device *nand; + unsigned int page_buffer_size; + unsigned int oob_buffer_size; + void *spare_databuf; + void *spare_oobbuf; + bool bounce_data; + bool bounce_oob; +}; + +int nand_ecc_init_req_tweaking(struct nand_ecc_req_tweak_ctx *ctx, + struct nand_device *nand); +void nand_ecc_cleanup_req_tweaking(struct nand_ecc_req_tweak_ctx *ctx); +void nand_ecc_tweak_req(struct nand_ecc_req_tweak_ctx *ctx, + struct nand_page_io_req *req); +void nand_ecc_restore_req(struct nand_ecc_req_tweak_ctx *ctx, + struct nand_page_io_req *req); + /** * struct nand_ecc - Information relative to the ECC * @defaults: Default values, depend on the underlying subsystem -- cgit v1.2.3 From cdbe8df5e28e452c232c0c16b205edfd390d28e5 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:06 +0200 Subject: mtd: nand: ecc-bch: Move BCH code to the generic NAND layer BCH ECC code might be later re-used by the SPI NAND layer. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-3-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-bch.h | 66 +++++++++++++++++++++++++++++++++++++ include/linux/mtd/nand_bch.h | 66 ------------------------------------- 2 files changed, 66 insertions(+), 66 deletions(-) create mode 100644 include/linux/mtd/nand-ecc-sw-bch.h delete mode 100644 include/linux/mtd/nand_bch.h (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-bch.h b/include/linux/mtd/nand-ecc-sw-bch.h new file mode 100644 index 000000000000..1e1ee3af82b1 --- /dev/null +++ b/include/linux/mtd/nand-ecc-sw-bch.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright © 2011 Ivan Djelic + * + * This file is the header for the NAND BCH ECC implementation. + */ + +#ifndef __MTD_NAND_ECC_SW_BCH_H__ +#define __MTD_NAND_ECC_SW_BCH_H__ + +struct mtd_info; +struct nand_chip; +struct nand_bch_control; + +#if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) + +static inline int mtd_nand_has_bch(void) { return 1; } + +/* + * Calculate BCH ecc code + */ +int nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, + u_char *ecc_code); + +/* + * Detect and correct bit errors + */ +int nand_bch_correct_data(struct nand_chip *chip, u_char *dat, + u_char *read_ecc, u_char *calc_ecc); +/* + * Initialize BCH encoder/decoder + */ +struct nand_bch_control *nand_bch_init(struct mtd_info *mtd); +/* + * Release BCH encoder/decoder resources + */ +void nand_bch_free(struct nand_bch_control *nbc); + +#else /* !CONFIG_MTD_NAND_ECC_SW_BCH */ + +static inline int mtd_nand_has_bch(void) { return 0; } + +static inline int +nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, + u_char *ecc_code) +{ + return -1; +} + +static inline int +nand_bch_correct_data(struct nand_chip *chip, unsigned char *buf, + unsigned char *read_ecc, unsigned char *calc_ecc) +{ + return -ENOTSUPP; +} + +static inline struct nand_bch_control *nand_bch_init(struct mtd_info *mtd) +{ + return NULL; +} + +static inline void nand_bch_free(struct nand_bch_control *nbc) {} + +#endif /* CONFIG_MTD_NAND_ECC_SW_BCH */ + +#endif /* __MTD_NAND_ECC_SW_BCH_H__ */ diff --git a/include/linux/mtd/nand_bch.h b/include/linux/mtd/nand_bch.h deleted file mode 100644 index d5956cc48ba9..000000000000 --- a/include/linux/mtd/nand_bch.h +++ /dev/null @@ -1,66 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright © 2011 Ivan Djelic - * - * This file is the header for the NAND BCH ECC implementation. - */ - -#ifndef __MTD_NAND_BCH_H__ -#define __MTD_NAND_BCH_H__ - -struct mtd_info; -struct nand_chip; -struct nand_bch_control; - -#if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) - -static inline int mtd_nand_has_bch(void) { return 1; } - -/* - * Calculate BCH ecc code - */ -int nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, - u_char *ecc_code); - -/* - * Detect and correct bit errors - */ -int nand_bch_correct_data(struct nand_chip *chip, u_char *dat, - u_char *read_ecc, u_char *calc_ecc); -/* - * Initialize BCH encoder/decoder - */ -struct nand_bch_control *nand_bch_init(struct mtd_info *mtd); -/* - * Release BCH encoder/decoder resources - */ -void nand_bch_free(struct nand_bch_control *nbc); - -#else /* !CONFIG_MTD_NAND_ECC_SW_BCH */ - -static inline int mtd_nand_has_bch(void) { return 0; } - -static inline int -nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, - u_char *ecc_code) -{ - return -1; -} - -static inline int -nand_bch_correct_data(struct nand_chip *chip, unsigned char *buf, - unsigned char *read_ecc, unsigned char *calc_ecc) -{ - return -ENOTSUPP; -} - -static inline struct nand_bch_control *nand_bch_init(struct mtd_info *mtd) -{ - return NULL; -} - -static inline void nand_bch_free(struct nand_bch_control *nbc) {} - -#endif /* CONFIG_MTD_NAND_ECC_SW_BCH */ - -#endif /* __MTD_NAND_BCH_H__ */ -- cgit v1.2.3 From 3c0fe36abebee55821badaa9d6cecd03799f7843 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:08 +0200 Subject: mtd: nand: ecc-bch: Stop exporting the private structure The NAND BCH control structure has nothing to do outside of this driver, all users of the nand_bch_init/free() functions just save it to chip->ecc.priv so do it in this driver directly and return a regular error code instead. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-5-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-bch.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-bch.h b/include/linux/mtd/nand-ecc-sw-bch.h index 1e1ee3af82b1..b62b8bd4669f 100644 --- a/include/linux/mtd/nand-ecc-sw-bch.h +++ b/include/linux/mtd/nand-ecc-sw-bch.h @@ -10,7 +10,6 @@ struct mtd_info; struct nand_chip; -struct nand_bch_control; #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) @@ -30,11 +29,11 @@ int nand_bch_correct_data(struct nand_chip *chip, u_char *dat, /* * Initialize BCH encoder/decoder */ -struct nand_bch_control *nand_bch_init(struct mtd_info *mtd); +int nand_bch_init(struct nand_chip *chip); /* * Release BCH encoder/decoder resources */ -void nand_bch_free(struct nand_bch_control *nbc); +void nand_bch_free(struct nand_chip *chip); #else /* !CONFIG_MTD_NAND_ECC_SW_BCH */ @@ -54,12 +53,12 @@ nand_bch_correct_data(struct nand_chip *chip, unsigned char *buf, return -ENOTSUPP; } -static inline struct nand_bch_control *nand_bch_init(struct mtd_info *mtd) +static inline int nand_bch_init(struct nand_chip *chip) { - return NULL; + return -ENOTSUPP; } -static inline void nand_bch_free(struct nand_bch_control *nbc) {} +static inline void nand_bch_free(struct nand_chip *chip) {} #endif /* CONFIG_MTD_NAND_ECC_SW_BCH */ -- cgit v1.2.3 From e3010bd3ef1eda13f08155fe43846a64d0990a86 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:09 +0200 Subject: mtd: nand: ecc-bch: Return only valid error codes When a function is not available, returning -ENOTSUPP makes much more sense than returning -1. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-6-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-bch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-bch.h b/include/linux/mtd/nand-ecc-sw-bch.h index b62b8bd4669f..d92d2abcfcef 100644 --- a/include/linux/mtd/nand-ecc-sw-bch.h +++ b/include/linux/mtd/nand-ecc-sw-bch.h @@ -43,7 +43,7 @@ static inline int nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, u_char *ecc_code) { - return -1; + return -ENOTSUPP; } static inline int -- cgit v1.2.3 From 127aae6077562e3926ebad7c782123c2afe95846 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:10 +0200 Subject: mtd: nand: ecc-bch: Drop mtd_nand_has_bch() Like for any other compilation option, use the IS_ENABLED() macro instead of hardcoding it. By droping this helper we can get rid of the BCH header in nandsim.c. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-7-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-bch.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-bch.h b/include/linux/mtd/nand-ecc-sw-bch.h index d92d2abcfcef..7b62996d9e01 100644 --- a/include/linux/mtd/nand-ecc-sw-bch.h +++ b/include/linux/mtd/nand-ecc-sw-bch.h @@ -13,8 +13,6 @@ struct nand_chip; #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) -static inline int mtd_nand_has_bch(void) { return 1; } - /* * Calculate BCH ecc code */ @@ -37,8 +35,6 @@ void nand_bch_free(struct nand_chip *chip); #else /* !CONFIG_MTD_NAND_ECC_SW_BCH */ -static inline int mtd_nand_has_bch(void) { return 0; } - static inline int nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, u_char *ecc_code) -- cgit v1.2.3 From ea146d7fbf5081b5eb2777df5e30ed70ca68985b Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:11 +0200 Subject: mtd: nand: ecc-bch: Update the prototypes to be more generic These functions must be usable by the main NAND core, so their names must be technology-agnostic as well as the parameters. Hence, we pass a generic nand_device instead of a raw nand_chip structure. As it seems that changing the raw NAND functions to always pass a generic NAND device is a lost of time, we prefer to create dedicated raw NAND wrappers that will be useful in the near future to do the translation. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-8-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-bch.h | 45 +++++++++++++------------------------ include/linux/mtd/rawnand.h | 5 +++++ 2 files changed, 21 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-bch.h b/include/linux/mtd/nand-ecc-sw-bch.h index 7b62996d9e01..f0caee3b03d0 100644 --- a/include/linux/mtd/nand-ecc-sw-bch.h +++ b/include/linux/mtd/nand-ecc-sw-bch.h @@ -8,53 +8,40 @@ #ifndef __MTD_NAND_ECC_SW_BCH_H__ #define __MTD_NAND_ECC_SW_BCH_H__ -struct mtd_info; -struct nand_chip; +#include #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) -/* - * Calculate BCH ecc code - */ -int nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, - u_char *ecc_code); - -/* - * Detect and correct bit errors - */ -int nand_bch_correct_data(struct nand_chip *chip, u_char *dat, - u_char *read_ecc, u_char *calc_ecc); -/* - * Initialize BCH encoder/decoder - */ -int nand_bch_init(struct nand_chip *chip); -/* - * Release BCH encoder/decoder resources - */ -void nand_bch_free(struct nand_chip *chip); +int nand_ecc_sw_bch_calculate(struct nand_device *nand, + const unsigned char *buf, unsigned char *code); +int nand_ecc_sw_bch_correct(struct nand_device *nand, unsigned char *buf, + unsigned char *read_ecc, unsigned char *calc_ecc); +int nand_ecc_sw_bch_init(struct nand_device *nand); +void nand_ecc_sw_bch_cleanup(struct nand_device *nand); #else /* !CONFIG_MTD_NAND_ECC_SW_BCH */ -static inline int -nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat, - u_char *ecc_code) +static inline int nand_ecc_sw_bch_calculate(struct nand_device *nand, + const unsigned char *buf, + unsigned char *code) { return -ENOTSUPP; } -static inline int -nand_bch_correct_data(struct nand_chip *chip, unsigned char *buf, - unsigned char *read_ecc, unsigned char *calc_ecc) +static inline int nand_ecc_sw_bch_correct(struct nand_device *nand, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc) { return -ENOTSUPP; } -static inline int nand_bch_init(struct nand_chip *chip) +static inline int nand_ecc_sw_bch_init(struct nand_device *nand) { return -ENOTSUPP; } -static inline void nand_bch_free(struct nand_chip *chip) {} +static inline void nand_ecc_sw_bch_cleanup(struct nand_device *nand) {} #endif /* CONFIG_MTD_NAND_ECC_SW_BCH */ diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index aac07940de09..23623beaad1d 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1303,6 +1303,11 @@ static inline int nand_opcode_8bits(unsigned int command) return 0; } +int rawnand_sw_bch_init(struct nand_chip *chip); +int rawnand_sw_bch_correct(struct nand_chip *chip, unsigned char *buf, + unsigned char *read_ecc, unsigned char *calc_ecc); +void rawnand_sw_bch_cleanup(struct nand_chip *chip); + int nand_check_erased_ecc_chunk(void *data, int datalen, void *ecc, int ecclen, void *extraoob, int extraooblen, -- cgit v1.2.3 From 80fe603160a4732a08f0f08f3e3312a3f3a79eee Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:12 +0200 Subject: mtd: nand: ecc-bch: Stop using raw NAND structures This code is meant to be reused by the SPI-NAND core. Now that the driver has been cleaned and reorganized, use a generic ECC engine object to store the driver's data instead of accessing members of the nand_chip structure. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-9-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-bch.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-bch.h b/include/linux/mtd/nand-ecc-sw-bch.h index f0caee3b03d0..ce005528e55f 100644 --- a/include/linux/mtd/nand-ecc-sw-bch.h +++ b/include/linux/mtd/nand-ecc-sw-bch.h @@ -9,6 +9,31 @@ #define __MTD_NAND_ECC_SW_BCH_H__ #include +#include + +/** + * struct nand_ecc_sw_bch_conf - private software BCH ECC engine structure + * @reqooblen: Save the actual user OOB length requested before overwriting it + * @spare_oobbuf: Spare OOB buffer if none is provided + * @code_size: Number of bytes needed to store a code (one code per step) + * @nsteps: Number of steps + * @calc_buf: Buffer to use when calculating ECC bytes + * @code_buf: Buffer to use when reading (raw) ECC bytes from the chip + * @bch: BCH control structure + * @errloc: error location array + * @eccmask: XOR ecc mask, allows erased pages to be decoded as valid + */ +struct nand_ecc_sw_bch_conf { + unsigned int reqooblen; + void *spare_oobbuf; + unsigned int code_size; + unsigned int nsteps; + u8 *calc_buf; + u8 *code_buf; + struct bch_control *bch; + unsigned int *errloc; + unsigned char *eccmask; +}; #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) -- cgit v1.2.3 From 57e3cebd022fbc035dcf190ac789fd2ffc747f5b Mon Sep 17 00:00:00 2001 From: Shenming Lu Date: Sat, 28 Nov 2020 22:18:57 +0800 Subject: KVM: arm64: Delay the polling of the GICR_VPENDBASER.Dirty bit In order to reduce the impact of the VPT parsing happening on the GIC, we can split the vcpu reseidency in two phases: - programming GICR_VPENDBASER: this still happens in vcpu_load() - checking for the VPT parsing to be complete: this can happen on vcpu entry (in kvm_vgic_flush_hwstate()) This allows the GIC and the CPU to work in parallel, rewmoving some of the entry overhead. Suggested-by: Marc Zyngier Signed-off-by: Shenming Lu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201128141857.983-3-lushenming@huawei.com --- include/linux/irqchip/arm-gic-v4.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 6976b8331b60..943c3411ca10 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -39,6 +39,8 @@ struct its_vpe { irq_hw_number_t vpe_db_lpi; /* VPE resident */ bool resident; + /* VPT parse complete */ + bool ready; union { /* GICv4.0 implementations */ struct { @@ -104,6 +106,7 @@ enum its_vcpu_info_cmd_type { PROP_UPDATE_AND_INV_VLPI, SCHEDULE_VPE, DESCHEDULE_VPE, + COMMIT_VPE, INVALL_VPE, PROP_UPDATE_VSGI, }; @@ -129,6 +132,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm); void its_free_vcpu_irqs(struct its_vm *vm); int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en); int its_make_vpe_non_resident(struct its_vpe *vpe, bool db); +int its_commit_vpe(struct its_vpe *vpe); int its_invall_vpe(struct its_vpe *vpe); int its_map_vlpi(int irq, struct its_vlpi_map *map); int its_get_vlpi(int irq, struct its_vlpi_map *map); -- cgit v1.2.3 From bb4c6910c8b41623104c2e64a30615682689a54d Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 26 Nov 2020 09:28:51 +0100 Subject: genirq/irqdomain: Add an irq_create_mapping_affinity() function There is currently no way to convey the affinity of an interrupt via irq_create_mapping(), which creates issues for devices that expect that affinity to be managed by the kernel. In order to sort this out, rename irq_create_mapping() to irq_create_mapping_affinity() with an additional affinity parameter that can be passed down to irq_domain_alloc_descs(). irq_create_mapping() is re-implemented as a wrapper around irq_create_mapping_affinity(). No functional change. Fixes: e75eafb9b039 ("genirq/msi: Switch to new irq spreading infrastructure") Signed-off-by: Laurent Vivier Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kurz Cc: Michael Ellerman Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201126082852.1178497-2-lvivier@redhat.com --- include/linux/irqdomain.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 71535e87109f..ea5a337e0f8b 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -384,11 +384,19 @@ extern void irq_domain_associate_many(struct irq_domain *domain, extern void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq); -extern unsigned int irq_create_mapping(struct irq_domain *host, - irq_hw_number_t hwirq); +extern unsigned int irq_create_mapping_affinity(struct irq_domain *host, + irq_hw_number_t hwirq, + const struct irq_affinity_desc *affinity); extern unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec); extern void irq_dispose_mapping(unsigned int virq); +static inline unsigned int irq_create_mapping(struct irq_domain *host, + irq_hw_number_t hwirq) +{ + return irq_create_mapping_affinity(host, hwirq, NULL); +} + + /** * irq_linear_revmap() - Find a linux irq from a hw irq number. * @domain: domain owning this hardware interrupt -- cgit v1.2.3 From 63e2fffa59a9dd91e443b08832656399fd80b7f0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 15 Nov 2020 17:37:37 -0500 Subject: pNFS/flexfiles: Fix array overflow when flexfiles mirroring is enabled If the flexfiles mirroring is enabled, then the read code expects to be able to set pgio->pg_mirror_idx to point to the data server that is being used for this particular read. However it does not change the pg_mirror_count because we only need to send a single read. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index c32c15216da3..f0373a6cb5fb 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -55,6 +55,7 @@ struct nfs_page { unsigned short wb_nio; /* Number of I/O attempts */ }; +struct nfs_pgio_mirror; struct nfs_pageio_descriptor; struct nfs_pageio_ops { void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *); @@ -64,6 +65,9 @@ struct nfs_pageio_ops { unsigned int (*pg_get_mirror_count)(struct nfs_pageio_descriptor *, struct nfs_page *); void (*pg_cleanup)(struct nfs_pageio_descriptor *); + struct nfs_pgio_mirror * + (*pg_get_mirror)(struct nfs_pageio_descriptor *, u32); + u32 (*pg_set_mirror)(struct nfs_pageio_descriptor *, u32); }; struct nfs_rw_ops { -- cgit v1.2.3 From 5a7e702670adc368caa1b64c2138956d8cba0d4f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Mar 2020 10:42:09 -0400 Subject: SUNRPC: Adjust synopsis of xdr_buf_subsegment() Clean up: This enables xdr_buf_subsegment()'s callers to pass in a const pointer to that buffer. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 9548d075e06d..ec2a22ccdc2a 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -183,7 +183,8 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) */ extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); -extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern int xdr_buf_subsegment(const struct xdr_buf *buf, struct xdr_buf *subbuf, + unsigned int base, unsigned int len); extern void xdr_buf_trim(struct xdr_buf *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); -- cgit v1.2.3 From 03493bca084fdca48abc59b00e06ce733aa9eb7d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 10 Jun 2020 10:36:42 -0400 Subject: SUNRPC: Rename svc_encode_read_payload() Clean up: "result payload" is a less confusing name for these payloads. "READ payload" reflects only the NFS usage. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 6 +++--- include/linux/sunrpc/svc_rdma.h | 4 ++-- include/linux/sunrpc/svc_xprt.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 386628b36bc7..c220b734fa69 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -519,9 +519,9 @@ void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); char * svc_print_addr(struct svc_rqst *, char *, size_t); -int svc_encode_read_payload(struct svc_rqst *rqstp, - unsigned int offset, - unsigned int length); +int svc_encode_result_payload(struct svc_rqst *rqstp, + unsigned int offset, + unsigned int length); unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages, struct kvec *first, size_t total); diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 9dc3a3b88391..2b870a3f391b 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -207,8 +207,8 @@ extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *rctxt, int status); extern int svc_rdma_sendto(struct svc_rqst *); -extern int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, - unsigned int length); +extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, + unsigned int length); /* svc_rdma_transport.c */ extern struct svc_xprt_class svc_rdma_class; diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index aca35ab5cff2..92455e0d5244 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -21,8 +21,8 @@ struct svc_xprt_ops { int (*xpo_has_wspace)(struct svc_xprt *); int (*xpo_recvfrom)(struct svc_rqst *); int (*xpo_sendto)(struct svc_rqst *); - int (*xpo_read_payload)(struct svc_rqst *, unsigned int, - unsigned int); + int (*xpo_result_payload)(struct svc_rqst *, unsigned int, + unsigned int); void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); -- cgit v1.2.3 From f6ad77590a5d432589a5d8a211c4e8e50cd8bb63 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Mar 2020 10:42:10 -0400 Subject: svcrdma: Post RDMA Writes while XDR encoding replies The only RPC/RDMA ordering requirement between RDMA Writes and RDMA Sends is that the responder must post the Writes on the Send queue before posting the Send that conveys the RPC Reply for that Write payload. The Linux NFS server implementation now has a transport method that can post result Payload Writes earlier than svc_rdma_sendto: ->xpo_result_payload() This gets RDMA Writes going earlier so they are more likely to be complete at the remote end before the Send completes. Some care must be taken with pulled-up Replies. We don't want to push the Write chunk and then send the same payload data via Send. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 2b870a3f391b..f5a3c852bb90 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -183,9 +183,7 @@ extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct svc_rdma_recv_ctxt *head, __be32 *p); extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, - __be32 *wr_ch, struct xdr_buf *xdr, - unsigned int offset, - unsigned long length); + __be32 *wr_ch, const struct xdr_buf *xdr); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_recv_ctxt *rctxt, struct xdr_buf *xdr); -- cgit v1.2.3 From 78147ca8b4a9b6cf0e597ddd6bf17959e08376c2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 22 Jun 2020 10:15:41 -0400 Subject: svcrdma: Add a "parsed chunk list" data structure This simple data structure binds the location of each data payload inside of an RPC message to the chunk that will be used to push it to or pull it from the client. There are several benefits to this small additional overhead: * It enables support for more than one chunk in incoming Read and Write lists. * It translates the version-specific on-the-wire format into a generic in-memory structure, enabling support for multiple versions of the RPC/RDMA transport protocol. * It enables the server to re-organize a chunk list if it needs to adjust where Read chunk data lands in server memory without altering the contents of the XDR-encoded Receive buffer. Construction of these lists is done while sanity checking each incoming RPC/RDMA header. Subsequent patches will make use of the generated data structures. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 12 ++++ include/linux/sunrpc/svc_rdma_pcl.h | 128 ++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 include/linux/sunrpc/svc_rdma_pcl.h (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index f5a3c852bb90..a89d4209fe2a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -47,6 +47,8 @@ #include #include #include +#include + #include #include @@ -142,8 +144,18 @@ struct svc_rdma_recv_ctxt { unsigned int rc_page_count; unsigned int rc_hdr_count; u32 rc_inv_rkey; + + struct svc_rdma_pcl rc_call_pcl; + + struct svc_rdma_pcl rc_read_pcl; + __be32 *rc_write_list; + struct svc_rdma_chunk *rc_cur_result_payload; + struct svc_rdma_pcl rc_write_pcl; + __be32 *rc_reply_chunk; + struct svc_rdma_pcl rc_reply_pcl; + unsigned int rc_read_payload_offset; unsigned int rc_read_payload_length; struct page *rc_pages[RPCSVC_MAXPAGES]; diff --git a/include/linux/sunrpc/svc_rdma_pcl.h b/include/linux/sunrpc/svc_rdma_pcl.h new file mode 100644 index 000000000000..7516ad0fae80 --- /dev/null +++ b/include/linux/sunrpc/svc_rdma_pcl.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020, Oracle and/or its affiliates + */ + +#ifndef SVC_RDMA_PCL_H +#define SVC_RDMA_PCL_H + +#include + +struct svc_rdma_segment { + u32 rs_handle; + u32 rs_length; + u64 rs_offset; +}; + +struct svc_rdma_chunk { + struct list_head ch_list; + + u32 ch_position; + u32 ch_length; + u32 ch_payload_length; + + u32 ch_segcount; + struct svc_rdma_segment ch_segments[]; +}; + +struct svc_rdma_pcl { + unsigned int cl_count; + struct list_head cl_chunks; +}; + +/** + * pcl_init - Initialize a parsed chunk list + * @pcl: parsed chunk list to initialize + * + */ +static inline void pcl_init(struct svc_rdma_pcl *pcl) +{ + INIT_LIST_HEAD(&pcl->cl_chunks); +} + +/** + * pcl_is_empty - Return true if parsed chunk list is empty + * @pcl: parsed chunk list + * + */ +static inline bool pcl_is_empty(const struct svc_rdma_pcl *pcl) +{ + return list_empty(&pcl->cl_chunks); +} + +/** + * pcl_first_chunk - Return first chunk in a parsed chunk list + * @pcl: parsed chunk list + * + * Returns the first chunk in the list, or NULL if the list is empty. + */ +static inline struct svc_rdma_chunk * +pcl_first_chunk(const struct svc_rdma_pcl *pcl) +{ + if (pcl_is_empty(pcl)) + return NULL; + return list_first_entry(&pcl->cl_chunks, struct svc_rdma_chunk, + ch_list); +} + +/** + * pcl_next_chunk - Return next chunk in a parsed chunk list + * @pcl: a parsed chunk list + * @chunk: chunk in @pcl + * + * Returns the next chunk in the list, or NULL if @chunk is already last. + */ +static inline struct svc_rdma_chunk * +pcl_next_chunk(const struct svc_rdma_pcl *pcl, struct svc_rdma_chunk *chunk) +{ + if (list_is_last(&chunk->ch_list, &pcl->cl_chunks)) + return NULL; + return list_next_entry(chunk, ch_list); +} + +/** + * pcl_for_each_chunk - Iterate over chunks in a parsed chunk list + * @pos: the loop cursor + * @pcl: a parsed chunk list + */ +#define pcl_for_each_chunk(pos, pcl) \ + for (pos = list_first_entry(&(pcl)->cl_chunks, struct svc_rdma_chunk, ch_list); \ + &pos->ch_list != &(pcl)->cl_chunks; \ + pos = list_next_entry(pos, ch_list)) + +/** + * pcl_for_each_segment - Iterate over segments in a parsed chunk + * @pos: the loop cursor + * @chunk: a parsed chunk + */ +#define pcl_for_each_segment(pos, chunk) \ + for (pos = &(chunk)->ch_segments[0]; \ + pos <= &(chunk)->ch_segments[(chunk)->ch_segcount - 1]; \ + pos++) + +/** + * pcl_chunk_end_offset - Return offset of byte range following @chunk + * @chunk: chunk in @pcl + * + * Returns starting offset of the region just after @chunk + */ +static inline unsigned int +pcl_chunk_end_offset(const struct svc_rdma_chunk *chunk) +{ + return xdr_align_size(chunk->ch_position + chunk->ch_payload_length); +} + +struct svc_rdma_recv_ctxt; + +extern void pcl_free(struct svc_rdma_pcl *pcl); +extern bool pcl_alloc_call(struct svc_rdma_recv_ctxt *rctxt, __be32 *p); +extern bool pcl_alloc_read(struct svc_rdma_recv_ctxt *rctxt, __be32 *p); +extern bool pcl_alloc_write(struct svc_rdma_recv_ctxt *rctxt, + struct svc_rdma_pcl *pcl, __be32 *p); +extern int pcl_process_nonpayloads(const struct svc_rdma_pcl *pcl, + const struct xdr_buf *xdr, + int (*actor)(const struct xdr_buf *, + void *), + void *data); + +#endif /* SVC_RDMA_PCL_H */ -- cgit v1.2.3 From 58b2e0fefa891c99f297120c8c062a35005dc562 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 22 Mar 2020 13:06:55 -0400 Subject: svcrdma: Use parsed chunk lists to detect reverse direction replies Refactor: Don't duplicate header decoding smarts here. Instead, use the new parsed chunk lists. Note that the XID sanity test is also removed. The XID is already looked up by the cb handler, and is rejected if it's not recognized. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index a89d4209fe2a..74247a33b6c6 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -144,6 +144,7 @@ struct svc_rdma_recv_ctxt { unsigned int rc_page_count; unsigned int rc_hdr_count; u32 rc_inv_rkey; + __be32 rc_msgtype; struct svc_rdma_pcl rc_call_pcl; -- cgit v1.2.3 From 7a1cbfa18059a40d4752dab057384c3ca2de326c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 17 Jun 2020 11:07:00 -0400 Subject: svcrdma: Use parsed chunk lists to construct RDMA Writes Refactor: Instead of re-parsing the ingress RPC Call transport header when constructing RDMA Writes, use the new parsed chunk lists for the Write list and Reply chunk, which are version-agnostic and already XDR-decoded. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 74247a33b6c6..d9148787efff 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -157,8 +157,6 @@ struct svc_rdma_recv_ctxt { __be32 *rc_reply_chunk; struct svc_rdma_pcl rc_reply_pcl; - unsigned int rc_read_payload_offset; - unsigned int rc_read_payload_length; struct page *rc_pages[RPCSVC_MAXPAGES]; }; @@ -196,7 +194,8 @@ extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct svc_rdma_recv_ctxt *head, __be32 *p); extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, - __be32 *wr_ch, const struct xdr_buf *xdr); + const struct svc_rdma_chunk *chunk, + const struct xdr_buf *xdr); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_recv_ctxt *rctxt, struct xdr_buf *xdr); -- cgit v1.2.3 From 9d0b09d5ef0c842592a5df3a5b8b59124485ff1b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 13 Mar 2020 10:42:11 -0400 Subject: svcrdma: Support multiple write chunks when pulling up When counting the number of SGEs needed to construct a Send request, do not count result payloads. And, when copying the Reply message into the pull-up buffer, result payloads are not to be copied to the Send buffer. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d9148787efff..7090af1a9791 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -182,6 +182,8 @@ extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, /* svc_rdma_recvfrom.c */ extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma); extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma); +extern struct svc_rdma_recv_ctxt * + svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma); extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *ctxt); extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma); -- cgit v1.2.3 From 2371bcc056647327445150d6df0502d92ad68439 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 9 Mar 2020 13:29:27 -0400 Subject: svcrdma: Support multiple Write chunks in svc_rdma_map_reply_msg() Refactor: svc_rdma_map_reply_msg() is restructured to DMA map only the parts of rq_res that do not contain a result payload. This change has been tested to confirm that it does not cause a regression in the no Write chunk and single Write chunk cases. Multiple Write chunks have not been tested. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 7090af1a9791..e09fafba00d7 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -213,7 +213,7 @@ extern int svc_rdma_send(struct svcxprt_rdma *rdma, extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, const struct svc_rdma_recv_ctxt *rctxt, - struct xdr_buf *xdr); + const struct xdr_buf *xdr); extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, struct svc_rdma_recv_ctxt *rctxt, -- cgit v1.2.3 From 41bc163ffe0fe67cba3fff2f5e8c58caa9e46a1e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 9 Mar 2020 13:29:28 -0400 Subject: svcrdma: Support multiple Write chunks in svc_rdma_send_reply_chunk Refactor svc_rdma_send_reply_chunk() so that it Sends only the parts of rq_res that do not contain a result payload. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index e09fafba00d7..85fbec47d4b5 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -200,7 +200,7 @@ extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, const struct xdr_buf *xdr); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_recv_ctxt *rctxt, - struct xdr_buf *xdr); + const struct xdr_buf *xdr); /* svc_rdma_sendto.c */ extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma); -- cgit v1.2.3 From 7954c8503b8709660d93505a40f1847634d9c3ba Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 17 Jun 2020 15:19:36 -0400 Subject: svcrdma: Remove chunk list pointers Clean up: These pointers are no longer used. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 85fbec47d4b5..6f247d043731 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -149,12 +149,8 @@ struct svc_rdma_recv_ctxt { struct svc_rdma_pcl rc_call_pcl; struct svc_rdma_pcl rc_read_pcl; - - __be32 *rc_write_list; struct svc_rdma_chunk *rc_cur_result_payload; struct svc_rdma_pcl rc_write_pcl; - - __be32 *rc_reply_chunk; struct svc_rdma_pcl rc_reply_pcl; struct page *rc_pages[RPCSVC_MAXPAGES]; -- cgit v1.2.3 From d96962e6d0e281bab6a48e83b42f5dce6eb28bf4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Sep 2020 13:04:17 -0400 Subject: svcrdma: Use the new parsed chunk list when pulling Read chunks As a pre-requisite for handling multiple Read chunks in each Read list, convert svc_rdma_recv_read_chunk() to use the new parsed Read chunk list. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 6f247d043731..294b56e61522 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -188,15 +188,15 @@ extern int svc_rdma_recvfrom(struct svc_rqst *); /* svc_rdma_rw.c */ extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); -extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, - struct svc_rqst *rqstp, - struct svc_rdma_recv_ctxt *head, __be32 *p); extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_chunk *chunk, const struct xdr_buf *xdr); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_recv_ctxt *rctxt, const struct xdr_buf *xdr); +extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, + struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head); /* svc_rdma_sendto.c */ extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma); -- cgit v1.2.3 From 0ae4c3e8a64ace1b8d7de033b0751afe43024416 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Nov 2020 15:52:47 -0500 Subject: SUNRPC: Add xdr_set_scratch_page() and xdr_reset_scratch_buffer() Clean up: De-duplicate some frequently-used code. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index ec2a22ccdc2a..2729d2d6efce 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -248,7 +248,6 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, struct page **pages, unsigned int len); -extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); @@ -256,6 +255,49 @@ extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned in extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); +/** + * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. + * @xdr: pointer to xdr_stream struct + * @buf: pointer to an empty buffer + * @buflen: size of 'buf' + * + * The scratch buffer is used when decoding from an array of pages. + * If an xdr_inline_decode() call spans across page boundaries, then + * we copy the data into the scratch buffer in order to allow linear + * access. + */ +static inline void +xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) +{ + xdr->scratch.iov_base = buf; + xdr->scratch.iov_len = buflen; +} + +/** + * xdr_set_scratch_page - Attach a scratch buffer for decoding data + * @xdr: pointer to xdr_stream struct + * @page: an anonymous page + * + * See xdr_set_scratch_buffer(). + */ +static inline void +xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page) +{ + xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE); +} + +/** + * xdr_reset_scratch_buffer - Clear scratch buffer information + * @xdr: pointer to xdr_stream struct + * + * See xdr_set_scratch_buffer(). + */ +static inline void +xdr_reset_scratch_buffer(struct xdr_stream *xdr) +{ + xdr_set_scratch_buffer(xdr, NULL, 0); +} + /** * xdr_stream_remaining - Return the number of bytes remaining in the stream * @xdr: pointer to struct xdr_stream -- cgit v1.2.3 From 5191955d6fc65e6d4efe8f4f10a6028298f57281 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 5 Nov 2020 11:19:42 -0500 Subject: SUNRPC: Prepare for xdr_stream-style decoding on the server-side A "permanent" struct xdr_stream is allocated in struct svc_rqst so that it is usable by all server-side decoders. A per-rqst scratch buffer is also allocated to handle decoding XDR data items that cross page boundaries. To demonstrate how it will be used, add the first call site for the new svcxdr_init_decode() API. As an additional part of the overall conversion, add symbolic constants for successful and failed XDR operations. Returning "0" is overloaded. Sometimes it means something failed, but sometimes it means success. To make it more clear when XDR decoding functions succeed or fail, introduce symbolic constants. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index c220b734fa69..34c2a69820e9 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -247,6 +247,8 @@ struct svc_rqst { size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; + struct xdr_stream rq_arg_stream; + struct page *rq_scratch_page; struct xdr_buf rq_res; struct page *rq_pages[RPCSVC_MAXPAGES + 1]; struct page * *rq_respages; /* points into rq_pages */ @@ -557,4 +559,18 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) svc_reserve(rqstp, space + rqstp->rq_auth_slack); } +/** + * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding + * @rqstp: controlling server RPC transaction context + * + */ +static inline void svcxdr_init_decode(struct svc_rqst *rqstp) +{ + struct xdr_stream *xdr = &rqstp->rq_arg_stream; + struct kvec *argv = rqstp->rq_arg.head; + + xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL); + xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); +} + #endif /* SUNRPC_SVC_H */ -- cgit v1.2.3 From c1346a1216ab5cb04a265380ac9035d91b16b6d5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 11:54:23 -0500 Subject: NFSD: Replace the internals of the READ_BUF() macro Convert the READ_BUF macro in nfs4xdr.c from open code to instead use the new xdr_stream-style decoders already in use by the encode side (and by the in-kernel NFS client implementation). Once this conversion is done, each individual NFSv4 argument decoder can be independently cleaned up to replace these macros with C code. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 2729d2d6efce..cc64cd0891f1 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -254,6 +254,8 @@ extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); +extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, + unsigned int len); /** * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. -- cgit v1.2.3 From cbd9abb3706e96563b36af67595707a7054ab693 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Nov 2020 13:19:51 -0500 Subject: NFSD: Replace READ* macros in nfsd4_decode_commit() Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index cc64cd0891f1..cc669d95c484 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -571,6 +571,27 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr) return 0; } +/** + * xdr_stream_decode_u64 - Decode a 64-bit integer + * @xdr: pointer to xdr_stream + * @ptr: location to store 64-bit integer + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + xdr_decode_hyper(p, ptr); + return 0; +} + /** * xdr_stream_decode_opaque_fixed - Decode fixed length opaque xdr data * @xdr: pointer to xdr_stream -- cgit v1.2.3 From 8918cc0d2b72db9997390626010b182c4500d749 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 16 Nov 2020 17:16:52 -0500 Subject: NFSD: Add helper for decoding locker4 Refactor for clarity. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index cc669d95c484..9b35ce50cf2b 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -550,6 +550,27 @@ static inline bool xdr_item_is_present(const __be32 *p) return *p != xdr_zero; } +/** + * xdr_stream_decode_bool - Decode a boolean + * @xdr: pointer to xdr_stream + * @ptr: pointer to a u32 in which to store the result + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_bool(struct xdr_stream *xdr, __u32 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + *ptr = (*p != xdr_zero); + return 0; +} + /** * xdr_stream_decode_u32 - Decode a 32-bit integer * @xdr: pointer to xdr_stream -- cgit v1.2.3 From 7fd3253a7de6a317a0683f83739479fb880bffc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 30 Nov 2020 19:51:56 +0100 Subject: net: Introduce preferred busy-polling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing busy-polling mode, enabled by the SO_BUSY_POLL socket option or system-wide using the /proc/sys/net/core/busy_read knob, is an opportunistic. That means that if the NAPI context is not scheduled, it will poll it. If, after busy-polling, the budget is exceeded the busy-polling logic will schedule the NAPI onto the regular softirq handling. One implication of the behavior above is that a busy/heavy loaded NAPI context will never enter/allow for busy-polling. Some applications prefer that most NAPI processing would be done by busy-polling. This series adds a new socket option, SO_PREFER_BUSY_POLL, that works in concert with the napi_defer_hard_irqs and gro_flush_timeout knobs. The napi_defer_hard_irqs and gro_flush_timeout knobs were introduced in commit 6f8b12d661d0 ("net: napi: add hard irqs deferral feature"), and allows for a user to defer interrupts to be enabled and instead schedule the NAPI context from a watchdog timer. When a user enables the SO_PREFER_BUSY_POLL, again with the other knobs enabled, and the NAPI context is being processed by a softirq, the softirq NAPI processing will exit early to allow the busy-polling to be performed. If the application stops performing busy-polling via a system call, the watchdog timer defined by gro_flush_timeout will timeout, and regular softirq handling will resume. In summary; Heavy traffic applications that prefer busy-polling over softirq processing should use this option. Example usage: $ echo 2 | sudo tee /sys/class/net/ens785f1/napi_defer_hard_irqs $ echo 200000 | sudo tee /sys/class/net/ens785f1/gro_flush_timeout Note that the timeout should be larger than the userspace processing window, otherwise the watchdog will timeout and fall back to regular softirq processing. Enable the SO_BUSY_POLL/SO_PREFER_BUSY_POLL options on your socket. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/20201130185205.196029-2-bjorn.topel@gmail.com --- include/linux/netdevice.h | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ce648a564f7..52d1cc2bd8a7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -350,23 +350,25 @@ struct napi_struct { }; enum { - NAPI_STATE_SCHED, /* Poll is scheduled */ - NAPI_STATE_MISSED, /* reschedule a napi */ - NAPI_STATE_DISABLE, /* Disable pending */ - NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ - NAPI_STATE_LISTED, /* NAPI added to system lists */ - NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ - NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ + NAPI_STATE_SCHED, /* Poll is scheduled */ + NAPI_STATE_MISSED, /* reschedule a napi */ + NAPI_STATE_DISABLE, /* Disable pending */ + NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ + NAPI_STATE_LISTED, /* NAPI added to system lists */ + NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */ + NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */ + NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ }; enum { - NAPIF_STATE_SCHED = BIT(NAPI_STATE_SCHED), - NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED), - NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE), - NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC), - NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED), - NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), - NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), + NAPIF_STATE_SCHED = BIT(NAPI_STATE_SCHED), + NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED), + NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE), + NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC), + NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED), + NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), + NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), + NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL), }; enum gro_result { @@ -437,6 +439,11 @@ static inline bool napi_disable_pending(struct napi_struct *n) return test_bit(NAPI_STATE_DISABLE, &n->state); } +static inline bool napi_prefer_busy_poll(struct napi_struct *n) +{ + return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); +} + bool napi_schedule_prep(struct napi_struct *n); /** -- cgit v1.2.3 From 147ad605dc12c515c97136899ccb5c70e6c674e1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:16 +0100 Subject: init: use type alignment for kernel parameters Specify type alignment for kernel parameters instead of sizeof(long). The alignment attribute is used to prevent gcc from increasing the alignment of objects with static extent as an optimisation, something which would mess up the __setup array stride. Using __alignof__(struct obs_kernel_param) rather than sizeof(long) is preferred since it better indicates why it is there and doesn't break should the type size or alignment change. Note that on m68k the alignment of struct obs_kernel_param is actually two and that adding a 1- or 2-byte field to the 12-byte struct would cause a breakage with the current 4-byte alignment. Link: https://lore.kernel.org/lkml/20201103175711.10731-1-johan@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Jessica Yu --- include/linux/init.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 7b53cb3092ee..e668832ef66a 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -255,7 +255,7 @@ struct obs_kernel_param { __aligned(1) = str; \ static struct obs_kernel_param __setup_##unique_id \ __used __section(".init.setup") \ - __attribute__((aligned((sizeof(long))))) \ + __aligned(__alignof__(struct obs_kernel_param)) \ = { __setup_str_##unique_id, fn, early } #define __setup(str, fn) \ -- cgit v1.2.3 From a2abe7cbd8fe2db5ff386c968e2273d9dc6c468d Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 30 Nov 2020 15:34:41 -0800 Subject: scs: switch to vmapped shadow stacks The kernel currently uses kmem_cache to allocate shadow call stacks, which means an overflows may not be immediately detected and can potentially result in another task's shadow stack to be overwritten. This change switches SCS to use virtually mapped shadow stacks for tasks, which increases shadow stack size to a full page and provides more robust overflow detection, similarly to VMAP_STACK. Signed-off-by: Sami Tolvanen Acked-by: Will Deacon Link: https://lore.kernel.org/r/20201130233442.2562064-2-samitolvanen@google.com Signed-off-by: Will Deacon --- include/linux/scs.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scs.h b/include/linux/scs.h index 6dec390cf154..2a506c2a16f4 100644 --- a/include/linux/scs.h +++ b/include/linux/scs.h @@ -15,12 +15,8 @@ #ifdef CONFIG_SHADOW_CALL_STACK -/* - * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit - * architecture) provided ~40% safety margin on stack usage while keeping - * memory allocation overhead reasonable. - */ -#define SCS_SIZE SZ_1K +#define SCS_ORDER 0 +#define SCS_SIZE (PAGE_SIZE << SCS_ORDER) #define GFP_SCS (GFP_KERNEL | __GFP_ZERO) /* An illegal pointer value to mark the end of the shadow stack. */ @@ -33,6 +29,8 @@ #define task_scs(tsk) (task_thread_info(tsk)->scs_base) #define task_scs_sp(tsk) (task_thread_info(tsk)->scs_sp) +void *scs_alloc(int node); +void scs_free(void *s); void scs_init(void); int scs_prepare(struct task_struct *tsk, int node); void scs_release(struct task_struct *tsk); @@ -61,6 +59,8 @@ static inline bool task_scs_end_corrupted(struct task_struct *tsk) #else /* CONFIG_SHADOW_CALL_STACK */ +static inline void *scs_alloc(int node) { return NULL; } +static inline void scs_free(void *s) {} static inline void scs_init(void) {} static inline void scs_task_reset(struct task_struct *tsk) {} static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; } -- cgit v1.2.3 From ac20ffbb0279aae7be48567fb734eae7d050769e Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 30 Nov 2020 15:34:42 -0800 Subject: arm64: scs: use vmapped IRQ and SDEI shadow stacks Use scs_alloc() to allocate also IRQ and SDEI shadow stacks instead of using statically allocated stacks. Signed-off-by: Sami Tolvanen Acked-by: Will Deacon Link: https://lore.kernel.org/r/20201130233442.2562064-3-samitolvanen@google.com [will: Move CONFIG_SHADOW_CALL_STACK check into init_irq_scs()] Signed-off-by: Will Deacon --- include/linux/scs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scs.h b/include/linux/scs.h index 2a506c2a16f4..18122d9e17ff 100644 --- a/include/linux/scs.h +++ b/include/linux/scs.h @@ -22,10 +22,6 @@ /* An illegal pointer value to mark the end of the shadow stack. */ #define SCS_END_MAGIC (0x5f6UL + POISON_POINTER_DELTA) -/* Allocate a static per-CPU shadow stack */ -#define DEFINE_SCS(name) \ - DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \ - #define task_scs(tsk) (task_thread_info(tsk)->scs_base) #define task_scs_sp(tsk) (task_thread_info(tsk)->scs_sp) -- cgit v1.2.3 From c860476b9e3a420192b28e580cb749e024d032eb Mon Sep 17 00:00:00 2001 From: Adam Ward Date: Mon, 30 Nov 2020 16:59:08 +0000 Subject: regulator: da9121: Add device variant regmaps Add ability to probe device and validate configuration, then apply a regmap configuration for a single or dual buck device accordingly. Signed-off-by: Adam Ward Link: https://lore.kernel.org/r/068c6b8d5e1b4e221e899e4c914c429429a2ec7d.1606755367.git.Adam.Ward.opensource@diasemi.com Signed-off-by: Mark Brown --- include/linux/regulator/da9121.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/linux/regulator/da9121.h (limited to 'include/linux') diff --git a/include/linux/regulator/da9121.h b/include/linux/regulator/da9121.h new file mode 100644 index 000000000000..c31180d886cc --- /dev/null +++ b/include/linux/regulator/da9121.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * DA9121 Single-channel dual-phase 10A buck converter + * DA9130 Single-channel dual-phase 10A buck converter (Automotive) + * DA9217 Single-channel dual-phase 6A buck converter + * DA9122 Dual-channel single-phase 5A buck converter + * DA9131 Dual-channel single-phase 5A buck converter (Automotive) + * DA9220 Dual-channel single-phase 3A buck converter + * DA9132 Dual-channel single-phase 3A buck converter (Automotive) + * + * Copyright (C) 2020 Dialog Semiconductor + * + * Authors: Adam Ward, Dialog Semiconductor + */ + +#ifndef __LINUX_REGULATOR_DA9121_H +#define __LINUX_REGULATOR_DA9121_H + +enum { + DA9121_IDX_BUCK1, + DA9121_IDX_BUCK2, + DA9121_IDX_MAX +}; + +#endif -- cgit v1.2.3 From 46c413d5bb239769e6f1de706adf422c807c7a5f Mon Sep 17 00:00:00 2001 From: Adam Ward Date: Mon, 30 Nov 2020 16:59:10 +0000 Subject: regulator: da9121: Add support for device variants via devicetree Add devicetree configuration and device variant parameters. Use the latter to enable the check and use of parameters specific to dual buck variants. Signed-off-by: Adam Ward Link: https://lore.kernel.org/r/5849ce60595aef1018bdde7dcfb54a7397597545.1606755367.git.Adam.Ward.opensource@diasemi.com Signed-off-by: Mark Brown --- include/linux/regulator/da9121.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/da9121.h b/include/linux/regulator/da9121.h index c31180d886cc..62d9d257dc25 100644 --- a/include/linux/regulator/da9121.h +++ b/include/linux/regulator/da9121.h @@ -16,10 +16,21 @@ #ifndef __LINUX_REGULATOR_DA9121_H #define __LINUX_REGULATOR_DA9121_H +#include + +struct gpio_desc; + enum { DA9121_IDX_BUCK1, DA9121_IDX_BUCK2, DA9121_IDX_MAX }; +struct da9121_pdata { + int num_buck; + struct gpio_desc *gpiod_ren[DA9121_IDX_MAX]; + struct device_node *reg_node[DA9121_IDX_MAX]; + struct regulator_init_data *init_data[DA9121_IDX_MAX]; +}; + #endif -- cgit v1.2.3 From 57d9352b6c651b090179f8b223b6681275c64a4f Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Sun, 15 Nov 2020 11:51:18 -0800 Subject: fpga: fpga-mgr: Add devm_fpga_mgr_register() API Add a devm_fpga_mgr_register() API that can be used to register a FPGA Manager that was created using devm_fpga_mgr_create(). Introduce a struct fpga_mgr_devres that makes the devres allocation a little bit more readable and gets reused for devm_fpga_mgr_create() devm_fpga_mgr_register(). Reviewed-by: Tom Rix Signed-off-by: Moritz Fischer Link: https://lore.kernel.org/r/20201115195127.284487-2-mdf@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/fpga/fpga-mgr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index e8ca62b2cb5b..2bc3030a69e5 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -198,6 +198,8 @@ void fpga_mgr_free(struct fpga_manager *mgr); int fpga_mgr_register(struct fpga_manager *mgr); void fpga_mgr_unregister(struct fpga_manager *mgr); +int devm_fpga_mgr_register(struct device *dev, struct fpga_manager *mgr); + struct fpga_manager *devm_fpga_mgr_create(struct device *dev, const char *name, const struct fpga_manager_ops *mops, void *priv); -- cgit v1.2.3 From fa69ee5aa48b5b52e8028c2eb486906e9998d081 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 25 Nov 2020 23:48:40 +0100 Subject: net: switch to storing KCOV handle directly in sk_buff It turns out that usage of skb extensions can cause memory leaks. Ido Schimmel reported: "[...] there are instances that blindly overwrite 'skb->extensions' by invoking skb_copy_header() after __alloc_skb()." Therefore, give up on using skb extensions for KCOV handle, and instead directly store kcov_handle in sk_buff. Fixes: 6370cc3bbd8a ("net: add kcov handle to skb extensions") Fixes: 85ce50d337d1 ("net: kcov: don't select SKB_EXTENSIONS when there is no NET") Fixes: 97f53a08cba1 ("net: linux/skbuff.h: combine SKB_EXTENSIONS + KCOV handling") Link: https://lore.kernel.org/linux-wireless/20201121160941.GA485907@shredder.lan/ Reported-by: Ido Schimmel Signed-off-by: Marco Elver Link: https://lore.kernel.org/r/20201125224840.2014773-1-elver@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0a1239819fd2..333bcdc39635 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -701,6 +701,7 @@ typedef unsigned char *sk_buff_data_t; * @transport_header: Transport layer header * @network_header: Network layer header * @mac_header: Link layer header + * @kcov_handle: KCOV remote handle for remote coverage collection * @tail: Tail pointer * @end: End pointer * @head: Head of buffer @@ -904,6 +905,10 @@ struct sk_buff { __u16 network_header; __u16 mac_header; +#ifdef CONFIG_KCOV + u64 kcov_handle; +#endif + /* private: */ __u32 headers_end[0]; /* public: */ @@ -4150,9 +4155,6 @@ enum skb_ext_id { #endif #if IS_ENABLED(CONFIG_MPTCP) SKB_EXT_MPTCP, -#endif -#if IS_ENABLED(CONFIG_KCOV) - SKB_EXT_KCOV_HANDLE, #endif SKB_EXT_NUM, /* must be last */ }; @@ -4608,35 +4610,22 @@ static inline void skb_reset_redirect(struct sk_buff *skb) #endif } -#if IS_ENABLED(CONFIG_KCOV) && IS_ENABLED(CONFIG_SKB_EXTENSIONS) static inline void skb_set_kcov_handle(struct sk_buff *skb, const u64 kcov_handle) { - /* Do not allocate skb extensions only to set kcov_handle to zero - * (as it is zero by default). However, if the extensions are - * already allocated, update kcov_handle anyway since - * skb_set_kcov_handle can be called to zero a previously set - * value. - */ - if (skb_has_extensions(skb) || kcov_handle) { - u64 *kcov_handle_ptr = skb_ext_add(skb, SKB_EXT_KCOV_HANDLE); - - if (kcov_handle_ptr) - *kcov_handle_ptr = kcov_handle; - } +#ifdef CONFIG_KCOV + skb->kcov_handle = kcov_handle; +#endif } static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { - u64 *kcov_handle = skb_ext_find(skb, SKB_EXT_KCOV_HANDLE); - - return kcov_handle ? *kcov_handle : 0; -} +#ifdef CONFIG_KCOV + return skb->kcov_handle; #else -static inline void skb_set_kcov_handle(struct sk_buff *skb, - const u64 kcov_handle) { } -static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { return 0; } -#endif /* CONFIG_KCOV && CONFIG_SKB_EXTENSIONS */ + return 0; +#endif +} #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- cgit v1.2.3 From 53ffabfd4ddb3a24c5603ae82eefb5537ecb5c20 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 9 Nov 2020 18:24:03 -0800 Subject: block: move blk_rq_bio_prep() to linux/blk-mq.h This is a preparation patch to have minimal block layer request bio append functionality in the context of the NVMeOF Passthru driver which falls in the fast path and doesn't need calls from blk_rq_append_bio(). Signed-off-by: Chaitanya Kulkarni Reviewed-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig --- include/linux/blk-mq.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 794b2a33a2c3..e7482e6ad3ec 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -593,6 +593,18 @@ static inline void blk_mq_cleanup_rq(struct request *rq) rq->q->mq_ops->cleanup_rq(rq); } +static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, + unsigned int nr_segs) +{ + rq->nr_phys_segments = nr_segs; + rq->__data_len = bio->bi_iter.bi_size; + rq->bio = rq->biotail = bio; + rq->ioprio = bio_prio(bio); + + if (bio->bi_disk) + rq->rq_disk = bio->bi_disk; +} + blk_qc_t blk_mq_submit_bio(struct bio *bio); #endif -- cgit v1.2.3 From b6f8ed33ab2bbc58e40fb1e2fb0f9c90cab04baf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 16 Oct 2020 15:20:41 +0200 Subject: pstore/blk: remove {un,}register_pstore_blk This interface is entirely unused, so remove them and various bits of unreachable code. Signed-off-by: Christoph Hellwig Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201016132047.3068029-4-hch@lst.de --- include/linux/pstore_blk.h | 42 ------------------------------------------ 1 file changed, 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h index 61e914522b01..99564f93d774 100644 --- a/include/linux/pstore_blk.h +++ b/include/linux/pstore_blk.h @@ -7,48 +7,6 @@ #include #include -/** - * typedef pstore_blk_panic_write_op - panic write operation to block device - * - * @buf: the data to write - * @start_sect: start sector to block device - * @sects: sectors count on buf - * - * Return: On success, zero should be returned. Others excluding -ENOMSG - * mean error. -ENOMSG means to try next zone. - * - * Panic write to block device must be aligned to SECTOR_SIZE. - */ -typedef int (*pstore_blk_panic_write_op)(const char *buf, sector_t start_sect, - sector_t sects); - -/** - * struct pstore_blk_info - pstore/blk registration details - * - * @major: Which major device number to support with pstore/blk - * @flags: The supported PSTORE_FLAGS_* from linux/pstore.h. - * @panic_write:The write operation only used for the panic case. - * This can be NULL, but is recommended to avoid losing - * crash data if the kernel's IO path or work queues are - * broken during a panic. - * @devt: The dev_t that pstore/blk has attached to. - * @nr_sects: Number of sectors on @devt. - * @start_sect: Starting sector on @devt. - */ -struct pstore_blk_info { - unsigned int major; - unsigned int flags; - pstore_blk_panic_write_op panic_write; - - /* Filled in by pstore/blk after registration. */ - dev_t devt; - sector_t nr_sects; - sector_t start_sect; -}; - -int register_pstore_blk(struct pstore_blk_info *info); -void unregister_pstore_blk(unsigned int major); - /** * struct pstore_device_info - back-end pstore/blk driver structure. * -- cgit v1.2.3 From 60b498852bf219c0bf2b0864c69972840978ca43 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Nov 2020 15:21:18 +0100 Subject: fs: remove get_super_thawed and get_super_exclusive_thawed Just open code the wait in the only caller of both functions. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/fs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8667d0cdc71e..a61df0dd4f19 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1409,7 +1409,7 @@ enum { struct sb_writers { int frozen; /* Is sb frozen? */ - wait_queue_head_t wait_unfrozen; /* for get_super_thawed() */ + wait_queue_head_t wait_unfrozen; /* wait for thaw */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; @@ -3132,8 +3132,6 @@ extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); -extern struct super_block *get_super_thawed(struct block_device *); -extern struct super_block *get_super_exclusive_thawed(struct block_device *bdev); extern struct super_block *get_active_super(struct block_device *bdev); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); -- cgit v1.2.3 From 040f04bd2e825f1d80b14a0e0ac3d830339eb779 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 11:54:06 +0100 Subject: fs: simplify freeze_bdev/thaw_bdev Store the frozen superblock in struct block_device to avoid the awkward interface that can return a sb only used a cookie, an ERR_PTR or NULL. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Acked-by: Chao Yu [f2fs] Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + include/linux/blkdev.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d9b69bbde5cc..ebfb4e7c1fd1 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -46,6 +46,7 @@ struct block_device { int bd_fsfreeze_count; /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; + struct super_block *bd_fsfreeze_sb; } __randomize_layout; /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 05b346a68c2e..12810a19edeb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -2020,7 +2020,7 @@ static inline int sync_blockdev(struct block_device *bdev) #endif int fsync_bdev(struct block_device *bdev); -struct super_block *freeze_bdev(struct block_device *bdev); -int thaw_bdev(struct block_device *bdev, struct super_block *sb); +int freeze_bdev(struct block_device *bdev); +int thaw_bdev(struct block_device *bdev); #endif /* _LINUX_BLKDEV_H */ -- cgit v1.2.3 From b601d148a16ea16dfbaf3600be35ee175847a09b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Nov 2020 16:21:52 +0100 Subject: block: remove a duplicate __disk_get_part prototype Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo Reviewed-by: Jan Kara Reviewed-by: Greg Kroah-Hartman Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 46553d6d6025..22f5b9fd96f8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -250,7 +250,6 @@ static inline dev_t part_devt(struct hd_struct *part) return part_to_dev(part)->devt; } -extern struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); static inline void disk_put_part(struct hd_struct *part) -- cgit v1.2.3 From 8d65269fe8065fee889bca5b204d711b0695a8f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 17 Nov 2020 08:18:55 +0100 Subject: block: add a bdev_kobj helper Add a little helper to find the kobject for a struct block_device. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Acked-by: Tejun Heo Acked-by: Coly Li [bcache] Acked-by: David Sterba [btrfs] Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index ebfb4e7c1fd1..9698f459cc65 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -49,6 +49,9 @@ struct block_device { struct super_block *bd_fsfreeze_sb; } __randomize_layout; +#define bdev_kobj(_bdev) \ + (&part_to_dev((_bdev)->bd_part)->kobj) + /* * Block error status values. See block/blk-core:blk_errors for the details. * Alpha cannot write a byte atomically, so we need to use 32-bit value. -- cgit v1.2.3 From c2637e80a09e0d6c698d2771d7230f59c2138122 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 14 Nov 2020 19:19:57 +0100 Subject: init: refactor name_to_dev_t Split each case into a self-contained helper, and move the block dependent code entirely under the pre-existing #ifdef CONFIG_BLOCK. This allows to remove the blk_lookup_devt stub in genhd.h. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 22f5b9fd96f8..ca5e356084c3 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -388,18 +388,13 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, } #endif /* CONFIG_SYSFS */ +dev_t blk_lookup_devt(const char *name, int partno); #ifdef CONFIG_BLOCK void printk_all_partitions(void); -dev_t blk_lookup_devt(const char *name, int partno); #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -static inline dev_t blk_lookup_devt(const char *name, int partno) -{ - dev_t devt = MKDEV(0, 0); - return devt; -} #endif /* CONFIG_BLOCK */ #endif /* _LINUX_GENHD_H */ -- cgit v1.2.3 From 4e7b5671c6a883d94b5428e1a9c141bbd56cb2a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 13:38:40 +0100 Subject: block: remove i_bdev Switch the block device lookup interfaces to directly work with a dev_t so that struct block_device references are only acquired by the blkdev_get variants (and the blk-cgroup special case). This means that we now don't need an extra reference in the inode and can generally simplify handling of struct block_device to keep the lookups contained in the core block layer code. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Tejun Heo Acked-by: Coly Li [bcache] Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- include/linux/fs.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 12810a19edeb..bdd7339bcda4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1973,7 +1973,7 @@ int bdev_read_only(struct block_device *bdev); int set_blocksize(struct block_device *bdev, int size); const char *bdevname(struct block_device *bdev, char *buffer); -struct block_device *lookup_bdev(const char *); +int lookup_bdev(const char *pathname, dev_t *dev); void blkdev_show(struct seq_file *seqf, off_t offset); diff --git a/include/linux/fs.h b/include/linux/fs.h index a61df0dd4f19..b0b358309657 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -696,7 +696,6 @@ struct inode { struct list_head i_devices; union { struct pipe_inode_info *i_pipe; - struct block_device *i_bdev; struct cdev *i_cdev; char *i_link; unsigned i_dir_seq; -- cgit v1.2.3 From 22ae8ce8b89241c94ac00c237752c0ffa37ba5ae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 Nov 2020 09:23:26 +0100 Subject: block: simplify bdev/disk lookup in blkdev_get To simplify block device lookup and a few other upcoming areas, make sure that we always have a struct block_device available for each disk and each partition, and only find existing block devices in bdget. The only downside of this is that each device and partition uses a little more memory. The upside will be that a lot of code can be simplified. With that all we need to look up the block device is to lookup the inode and do a few sanity checks on the gendisk, instead of the separate lookup for the gendisk. For blk-cgroup which wants to access a gendisk without opening it, a new blkdev_{get,put}_no_open low-level interface is added to replace the previous get_gendisk use. Note that the change to look up block device directly instead of the two step lookup using struct gendisk causes a subtile change in behavior: accessing a non-existing partition on an existing block device can now cause a call to request_module. That call is harmless, and in practice no recent system will access these nodes as they aren't created by udev and static /dev/ setups are unusual. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 4 ++-- include/linux/blkdev.h | 6 ++++++ include/linux/genhd.h | 7 ++++--- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index c8fc9792ac77..b9f3c246c3c9 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -197,12 +197,12 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); struct blkg_conf_ctx { - struct gendisk *disk; + struct block_device *bdev; struct blkcg_gq *blkg; char *body; }; -struct gendisk *blkcg_conf_get_disk(char **inputp); +struct block_device *blkcg_conf_open_bdev(char **inputp); int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx); void blkg_conf_finish(struct blkg_conf_ctx *ctx); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bdd7339bcda4..5d48b92f5e43 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1994,6 +1994,12 @@ void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, void *holder); void blkdev_put(struct block_device *bdev, fmode_t mode); +/* just for blk-cgroup, don't use elsewhere */ +struct block_device *blkdev_get_no_open(dev_t dev); +void blkdev_put_no_open(struct block_device *bdev); + +struct block_device *bdev_alloc(struct gendisk *disk, u8 partno); +void bdev_add(struct block_device *bdev, dev_t dev); struct block_device *I_BDEV(struct inode *inode); struct block_device *bdget_part(struct hd_struct *part); struct block_device *bdgrab(struct block_device *bdev); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ca5e356084c3..42a51653c730 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -65,6 +65,7 @@ struct hd_struct { struct disk_stats __percpu *dkstats; struct percpu_ref ref; + struct block_device *bdev; struct device __dev; struct kobject *holder_dir; int policy, partno; @@ -193,7 +194,6 @@ struct gendisk { int flags; unsigned long state; #define GD_NEED_PART_SCAN 0 - struct rw_semaphore lookup_sem; struct kobject *slave_dir; struct timer_rand_state *random; @@ -300,7 +300,6 @@ static inline void add_disk_no_queue_reg(struct gendisk *disk) } extern void del_gendisk(struct gendisk *gp); -extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern struct block_device *bdget_disk(struct gendisk *disk, int partno); extern void set_disk_ro(struct gendisk *disk, int flag); @@ -338,7 +337,6 @@ int blk_drop_partitions(struct block_device *bdev); extern struct gendisk *__alloc_disk_node(int minors, int node_id); extern void put_disk(struct gendisk *disk); -extern void put_disk_and_module(struct gendisk *disk); #define alloc_disk_node(minors, node_id) \ ({ \ @@ -388,7 +386,10 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, } #endif /* CONFIG_SYSFS */ +extern struct rw_semaphore bdev_lookup_sem; + dev_t blk_lookup_devt(const char *name, int partno); +void blk_request_module(dev_t devt); #ifdef CONFIG_BLOCK void printk_all_partitions(void); #else /* CONFIG_BLOCK */ -- cgit v1.2.3 From a954ea812018a84d350b316c39a2be3edc4b7ca8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 13:29:55 +0100 Subject: block: remove ->bd_contains Now that each hd_struct has a reference to the corresponding block_device, there is no need for the bd_contains pointer. Add a bdev_whole() helper to look up the whole device block_device struture instead. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 9698f459cc65..2e0a9bd9688d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -32,7 +32,6 @@ struct block_device { #ifdef CONFIG_SYSFS struct list_head bd_holder_disks; #endif - struct block_device * bd_contains; u8 bd_partno; struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ @@ -49,6 +48,9 @@ struct block_device { struct super_block *bd_fsfreeze_sb; } __randomize_layout; +#define bdev_whole(_bdev) \ + ((_bdev)->bd_disk->part0.bdev) + #define bdev_kobj(_bdev) \ (&part_to_dev((_bdev)->bd_part)->kobj) -- cgit v1.2.3 From 37c3fc9abb25cd767ad5b048358336ac89488c16 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Nov 2020 21:20:08 +0100 Subject: block: simplify the block device claiming interface Stop passing the whole device as a separate argument given that it can be trivially deducted and cleanup the !holder debug check. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5d48b92f5e43..43a25d855e04 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1988,10 +1988,8 @@ void blkdev_show(struct seq_file *seqf, off_t offset); struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder); struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); -int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, - void *holder); -void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, - void *holder); +int bd_prepare_to_claim(struct block_device *bdev, void *holder); +void bd_abort_claiming(struct block_device *bdev, void *holder); void blkdev_put(struct block_device *bdev, fmode_t mode); /* just for blk-cgroup, don't use elsewhere */ -- cgit v1.2.3 From c64dc3bd87097e7f08b9437819440f8bfddef995 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 16:38:15 +0100 Subject: block: simplify part_to_disk Now that struct hd_struct has a block_device pointer use that to find the disk. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/genhd.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 42a51653c730..6ba91ee54cb2 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -218,13 +218,9 @@ struct gendisk { static inline struct gendisk *part_to_disk(struct hd_struct *part) { - if (likely(part)) { - if (part->partno) - return dev_to_disk(part_to_dev(part)->parent); - else - return dev_to_disk(part_to_dev(part)); - } - return NULL; + if (unlikely(!part)) + return NULL; + return part->bdev->bd_disk; } static inline int disk_max_parts(struct gendisk *disk) -- cgit v1.2.3 From a782483cc1f875355690625d8253a232f2581418 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 Nov 2020 18:43:37 +0100 Subject: block: remove the nr_sects field in struct hd_struct Now that the hd_struct always has a block device attached to it, there is no need for having two size field that just get out of sync. Additionally the field in hd_struct did not use proper serialization, possibly allowing for torn writes. By only using the block_device field this problem also gets fixed. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Coly Li [bcache] Acked-by: Chao Yu [f2fs] Signed-off-by: Jens Axboe --- include/linux/genhd.h | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6ba91ee54cb2..30d4785b7df8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -52,15 +52,6 @@ struct partition_meta_info { struct hd_struct { sector_t start_sect; - /* - * nr_sects is protected by sequence counter. One might extend a - * partition while IO is happening to it and update of nr_sects - * can be non-atomic on 32bit machines with 64bit sector_t. - */ - sector_t nr_sects; -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - seqcount_t nr_sects_seq; -#endif unsigned long stamp; struct disk_stats __percpu *dkstats; struct percpu_ref ref; @@ -254,13 +245,6 @@ static inline void disk_put_part(struct hd_struct *part) put_device(part_to_dev(part)); } -static inline void hd_sects_seq_init(struct hd_struct *p) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - seqcount_init(&p->nr_sects_seq); -#endif -} - /* * Smarter partition iterator without context limits. */ @@ -318,13 +302,15 @@ static inline sector_t get_start_sect(struct block_device *bdev) { return bdev->bd_part->start_sect; } -static inline sector_t get_capacity(struct gendisk *disk) + +static inline sector_t bdev_nr_sectors(struct block_device *bdev) { - return disk->part0.nr_sects; + return i_size_read(bdev->bd_inode) >> 9; } -static inline void set_capacity(struct gendisk *disk, sector_t size) + +static inline sector_t get_capacity(struct gendisk *disk) { - disk->part0.nr_sects = size; + return bdev_nr_sectors(disk->part0.bdev); } int bdev_disk_changed(struct block_device *bdev, bool invalidate); @@ -358,10 +344,9 @@ int __register_blkdev(unsigned int major, const char *name, __register_blkdev(major, name, NULL) void unregister_blkdev(unsigned int major, const char *name); -void revalidate_disk_size(struct gendisk *disk, bool verbose); bool bdev_check_media_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); -void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors); +void set_capacity(struct gendisk *disk, sector_t size); /* for drivers/char/raw.c: */ int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); -- cgit v1.2.3 From 15e3d2c5cd53298272e59ad9072d3468f9dd3781 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:34:00 +0100 Subject: block: move disk stat accounting to struct block_device Move the dkstats and stamp field to struct block_device in preparation of killing struct hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 ++ include/linux/genhd.h | 2 -- include/linux/part_stat.h | 38 +++++++++++++++++++------------------- 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 2e0a9bd9688d..520011b95276 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -20,6 +20,8 @@ typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; struct block_device { + struct disk_stats __percpu *bd_stats; + unsigned long bd_stamp; dev_t bd_dev; int bd_openers; struct inode * bd_inode; /* will die */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 30d4785b7df8..804ac45fbfbc 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -52,8 +52,6 @@ struct partition_meta_info { struct hd_struct { sector_t start_sect; - unsigned long stamp; - struct disk_stats __percpu *dkstats; struct percpu_ref ref; struct block_device *bdev; diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 24125778ef3e..87ad60106e1d 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -25,17 +25,17 @@ struct disk_stats { #define part_stat_unlock() preempt_enable() #define part_stat_get_cpu(part, field, cpu) \ - (per_cpu_ptr((part)->dkstats, (cpu))->field) + (per_cpu_ptr((part)->bdev->bd_stats, (cpu))->field) #define part_stat_get(part, field) \ part_stat_get_cpu(part, field, smp_processor_id()) #define part_stat_read(part, field) \ ({ \ - typeof((part)->dkstats->field) res = 0; \ + typeof((part)->bdev->bd_stats->field) res = 0; \ unsigned int _cpu; \ for_each_possible_cpu(_cpu) \ - res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ + res += per_cpu_ptr((part)->bdev->bd_stats, _cpu)->field; \ res; \ }) @@ -44,7 +44,7 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) int i; for_each_possible_cpu(i) - memset(per_cpu_ptr(part->dkstats, i), value, + memset(per_cpu_ptr(part->bdev->bd_stats, i), value, sizeof(struct disk_stats)); } @@ -54,7 +54,7 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) part_stat_read(part, field[STAT_DISCARD])) #define __part_stat_add(part, field, addnd) \ - __this_cpu_add((part)->dkstats->field, addnd) + __this_cpu_add((part)->bdev->bd_stats->field, addnd) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ @@ -63,20 +63,20 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) field, addnd); \ } while (0) -#define part_stat_dec(gendiskp, field) \ - part_stat_add(gendiskp, field, -1) -#define part_stat_inc(gendiskp, field) \ - part_stat_add(gendiskp, field, 1) -#define part_stat_sub(gendiskp, field, subnd) \ - part_stat_add(gendiskp, field, -subnd) +#define part_stat_dec(part, field) \ + part_stat_add(part, field, -1) +#define part_stat_inc(part, field) \ + part_stat_add(part, field, 1) +#define part_stat_sub(part, field, subnd) \ + part_stat_add(part, field, -subnd) -#define part_stat_local_dec(gendiskp, field) \ - local_dec(&(part_stat_get(gendiskp, field))) -#define part_stat_local_inc(gendiskp, field) \ - local_inc(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read(gendiskp, field) \ - local_read(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read_cpu(gendiskp, field, cpu) \ - local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) +#define part_stat_local_dec(part, field) \ + local_dec(&(part_stat_get(part, field))) +#define part_stat_local_inc(part, field) \ + local_inc(&(part_stat_get(part, field))) +#define part_stat_local_read(part, field) \ + local_read(&(part_stat_get(part, field))) +#define part_stat_local_read_cpu(part, field, cpu) \ + local_read(&(part_stat_get_cpu(part, field, cpu))) #endif /* _LINUX_PART_STAT_H */ -- cgit v1.2.3 From 29ff57c61094e7bbd921ab10b5a99dce9a0132e0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:34:24 +0100 Subject: block: move the start_sect field to struct block_device Move the start_sect field to struct block_device in preparation of killing struct hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + include/linux/blkdev.h | 4 ++-- include/linux/genhd.h | 3 +-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 520011b95276..a690008f60cd 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -20,6 +20,7 @@ typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; struct block_device { + sector_t bd_start_sect; struct disk_stats __percpu *bd_stats; unsigned long bd_stamp; dev_t bd_dev; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 43a25d855e04..619adea57098 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1488,7 +1488,7 @@ static inline int bdev_alignment_offset(struct block_device *bdev) return -1; if (bdev_is_partition(bdev)) return queue_limit_alignment_offset(&q->limits, - bdev->bd_part->start_sect); + bdev->bd_start_sect); return q->limits.alignment_offset; } @@ -1529,7 +1529,7 @@ static inline int bdev_discard_alignment(struct block_device *bdev) if (bdev_is_partition(bdev)) return queue_limit_discard_alignment(&q->limits, - bdev->bd_part->start_sect); + bdev->bd_start_sect); return q->limits.discard_alignment; } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 804ac45fbfbc..50d27f5d38e2 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -51,7 +51,6 @@ struct partition_meta_info { }; struct hd_struct { - sector_t start_sect; struct percpu_ref ref; struct block_device *bdev; @@ -298,7 +297,7 @@ extern void rand_initialize_disk(struct gendisk *disk); static inline sector_t get_start_sect(struct block_device *bdev) { - return bdev->bd_part->start_sect; + return bdev->bd_start_sect; } static inline sector_t bdev_nr_sectors(struct block_device *bdev) -- cgit v1.2.3 From 231926dbf0f084211e4ec4f4c006f0bf1f47809a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 12:01:45 +0100 Subject: block: move the partition_meta_info to struct block_device Move the partition_meta_info to struct block_device in preparation for killing struct hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 ++ include/linux/genhd.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a690008f60cd..2f8ede04e5a9 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -49,6 +49,8 @@ struct block_device { /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; struct super_block *bd_fsfreeze_sb; + + struct partition_meta_info *bd_meta_info; } __randomize_layout; #define bdev_whole(_bdev) \ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 50d27f5d38e2..30d7076155b4 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -57,7 +57,6 @@ struct hd_struct { struct device __dev; struct kobject *holder_dir; int policy, partno; - struct partition_meta_info *info; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; #endif -- cgit v1.2.3 From 1bdd5ae0251d678488dffcf455d4633c2beef1bc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 19:00:13 +0100 Subject: block: move holder_dir to struct block_device Move the holder_dir field to struct block_device in preparation for kill struct hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + include/linux/genhd.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 2f8ede04e5a9..c0591e52d7d7 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -35,6 +35,7 @@ struct block_device { #ifdef CONFIG_SYSFS struct list_head bd_holder_disks; #endif + struct kobject *bd_holder_dir; u8 bd_partno; struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 30d7076155b4..b4a5c05593b9 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -55,7 +55,6 @@ struct hd_struct { struct block_device *bdev; struct device __dev; - struct kobject *holder_dir; int policy, partno; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; -- cgit v1.2.3 From b309e9936347232c724eaa13f70533128b4864e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 16:28:47 +0100 Subject: block: move make_it_fail to struct block_device Move the make_it_fail flag to struct block_device an turn it into a bool in preparation of killing struct hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 +++ include/linux/genhd.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index c0591e52d7d7..b237f1e40814 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -52,6 +52,9 @@ struct block_device { struct super_block *bd_fsfreeze_sb; struct partition_meta_info *bd_meta_info; +#ifdef CONFIG_FAIL_MAKE_REQUEST + bool bd_make_it_fail; +#endif } __randomize_layout; #define bdev_whole(_bdev) \ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index b4a5c05593b9..349cf6403ccd 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -56,9 +56,6 @@ struct hd_struct { struct block_device *bdev; struct device __dev; int policy, partno; -#ifdef CONFIG_FAIL_MAKE_REQUEST - int make_it_fail; -#endif struct rcu_work rcu_work; }; -- cgit v1.2.3 From 83950d359010a493462d58c712b1124c877d1b3b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Nov 2020 16:36:02 +0100 Subject: block: move the policy field to struct block_device Move the policy field to struct block_device and rename it to the more descriptive bd_read_only. Also turn the field into a bool as it is used as such. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + include/linux/genhd.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index b237f1e40814..758cf71c9aa2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -23,6 +23,7 @@ struct block_device { sector_t bd_start_sect; struct disk_stats __percpu *bd_stats; unsigned long bd_stamp; + bool bd_read_only; /* read-only policy */ dev_t bd_dev; int bd_openers; struct inode * bd_inode; /* will die */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 349cf6403ccd..dcbf9ef7610e 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -55,7 +55,7 @@ struct hd_struct { struct block_device *bdev; struct device __dev; - int policy, partno; + int partno; struct rcu_work rcu_work; }; @@ -278,7 +278,7 @@ extern void set_disk_ro(struct gendisk *disk, int flag); static inline int get_disk_ro(struct gendisk *disk) { - return disk->part0.policy; + return disk->part0.bdev->bd_read_only; } extern void disk_block_events(struct gendisk *disk); -- cgit v1.2.3 From cb8432d650fe3be58bb962bc8e602dc405510327 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 Nov 2020 18:47:17 +0100 Subject: block: allocate struct hd_struct as part of struct bdev_inode Allocate hd_struct together with struct block_device to pre-load the lifetime rule changes in preparation of merging the two structures. Note that part0 was previously embedded into struct gendisk, but is a separate allocation now, and already points to the block_device instead of the hd_struct. The lifetime of struct gendisk is still controlled by the struct device embedded in the part0 hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- include/linux/genhd.h | 14 ++++++-------- include/linux/part_stat.h | 4 ++-- 3 files changed, 9 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 758cf71c9aa2..6edea5c16259 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -59,7 +59,7 @@ struct block_device { } __randomize_layout; #define bdev_whole(_bdev) \ - ((_bdev)->bd_disk->part0.bdev) + ((_bdev)->bd_disk->part0) #define bdev_kobj(_bdev) \ (&part_to_dev((_bdev)->bd_part)->kobj) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index dcbf9ef7610e..df7319da013c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -19,11 +19,12 @@ #include #include -#define dev_to_disk(device) container_of((device), struct gendisk, part0.__dev) #define dev_to_part(device) container_of((device), struct hd_struct, __dev) -#define disk_to_dev(disk) (&(disk)->part0.__dev) #define part_to_dev(part) (&((part)->__dev)) +#define dev_to_disk(device) (dev_to_part(device)->bdev->bd_disk) +#define disk_to_dev(disk) (part_to_dev((disk)->part0->bd_part)) + extern const struct device_type disk_type; extern struct device_type part_type; extern struct class block_class; @@ -51,12 +52,9 @@ struct partition_meta_info { }; struct hd_struct { - struct percpu_ref ref; - struct block_device *bdev; struct device __dev; int partno; - struct rcu_work rcu_work; }; /** @@ -168,7 +166,7 @@ struct gendisk { * helpers. */ struct disk_part_tbl __rcu *part_tbl; - struct hd_struct part0; + struct block_device *part0; const struct block_device_operations *fops; struct request_queue *queue; @@ -278,7 +276,7 @@ extern void set_disk_ro(struct gendisk *disk, int flag); static inline int get_disk_ro(struct gendisk *disk) { - return disk->part0.bdev->bd_read_only; + return disk->part0->bd_read_only; } extern void disk_block_events(struct gendisk *disk); @@ -302,7 +300,7 @@ static inline sector_t bdev_nr_sectors(struct block_device *bdev) static inline sector_t get_capacity(struct gendisk *disk) { - return bdev_nr_sectors(disk->part0.bdev); + return bdev_nr_sectors(disk->part0); } int bdev_disk_changed(struct block_device *bdev, bool invalidate); diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 87ad60106e1d..680de036691e 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -59,8 +59,8 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ if ((part)->partno) \ - __part_stat_add(&part_to_disk((part))->part0, \ - field, addnd); \ + __part_stat_add(part_to_disk((part))->part0->bd_part, \ + field, addnd); \ } while (0) #define part_stat_dec(part, field) \ -- cgit v1.2.3 From 8446fe9255be821cb38ffd306d7e8edc4b9ea662 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:36:54 +0100 Subject: block: switch partition lookup to use struct block_device Use struct block_device to lookup partitions on a disk. This removes all usage of struct hd_struct from the I/O path. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Acked-by: Coly Li [bcache] Acked-by: Chao Yu [f2fs] Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 8 ++++---- include/linux/genhd.h | 4 ++-- include/linux/part_stat.h | 17 ++++++++--------- 3 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 619adea57098..1d4be1fc6007 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -191,7 +191,7 @@ struct request { }; struct gendisk *rq_disk; - struct hd_struct *part; + struct block_device *part; #ifdef CONFIG_BLK_RQ_ALLOC_TIME /* Time that the first bio started allocating this request. */ u64 alloc_time_ns; @@ -1943,9 +1943,9 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time); -unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part, - struct bio *bio); -void part_end_io_acct(struct hd_struct *part, struct bio *bio, +unsigned long part_start_io_acct(struct gendisk *disk, + struct block_device **part, struct bio *bio); +void part_end_io_acct(struct block_device *part, struct bio *bio, unsigned long start_time); /** diff --git a/include/linux/genhd.h b/include/linux/genhd.h index df7319da013c..fe6fee77e2b9 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -131,8 +131,8 @@ enum { struct disk_part_tbl { struct rcu_head rcu_head; int len; - struct hd_struct __rcu *last_lookup; - struct hd_struct __rcu *part[]; + struct block_device __rcu *last_lookup; + struct block_device __rcu *part[]; }; struct disk_events; diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 680de036691e..d2558121d48c 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -25,26 +25,26 @@ struct disk_stats { #define part_stat_unlock() preempt_enable() #define part_stat_get_cpu(part, field, cpu) \ - (per_cpu_ptr((part)->bdev->bd_stats, (cpu))->field) + (per_cpu_ptr((part)->bd_stats, (cpu))->field) #define part_stat_get(part, field) \ part_stat_get_cpu(part, field, smp_processor_id()) #define part_stat_read(part, field) \ ({ \ - typeof((part)->bdev->bd_stats->field) res = 0; \ + typeof((part)->bd_stats->field) res = 0; \ unsigned int _cpu; \ for_each_possible_cpu(_cpu) \ - res += per_cpu_ptr((part)->bdev->bd_stats, _cpu)->field; \ + res += per_cpu_ptr((part)->bd_stats, _cpu)->field; \ res; \ }) -static inline void part_stat_set_all(struct hd_struct *part, int value) +static inline void part_stat_set_all(struct block_device *part, int value) { int i; for_each_possible_cpu(i) - memset(per_cpu_ptr(part->bdev->bd_stats, i), value, + memset(per_cpu_ptr(part->bd_stats, i), value, sizeof(struct disk_stats)); } @@ -54,13 +54,12 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) part_stat_read(part, field[STAT_DISCARD])) #define __part_stat_add(part, field, addnd) \ - __this_cpu_add((part)->bdev->bd_stats->field, addnd) + __this_cpu_add((part)->bd_stats->field, addnd) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ - if ((part)->partno) \ - __part_stat_add(part_to_disk((part))->part0->bd_part, \ - field, addnd); \ + if ((part)->bd_partno) \ + __part_stat_add(bdev_whole(part), field, addnd); \ } while (0) #define part_stat_dec(part, field) \ -- cgit v1.2.3 From 41e5c81984eac8ce87f2b4f57fec0bd90a049b2b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:37:14 +0100 Subject: block: remove the partno field from struct hd_struct Just use the bd_partno field in struct block_device everywhere. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index fe6fee77e2b9..3c13d4708e3f 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -54,7 +54,6 @@ struct partition_meta_info { struct hd_struct { struct block_device *bdev; struct device __dev; - int partno; }; /** -- cgit v1.2.3 From ad1eaa5344b293552b6ba43f5709c76a9aa14d17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Nov 2020 09:52:59 +0100 Subject: block: switch disk_part_iter_* to use a struct block_device Switch the partition iter infrastructure to iterate over block_device references instead of hd_struct ones mostly used to get at the block_device. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/genhd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3c13d4708e3f..cd23c80265b2 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -244,14 +244,14 @@ static inline void disk_put_part(struct hd_struct *part) struct disk_part_iter { struct gendisk *disk; - struct hd_struct *part; + struct block_device *part; int idx; unsigned int flags; }; extern void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, unsigned int flags); -extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); +struct block_device *disk_part_iter_next(struct disk_part_iter *piter); extern void disk_part_iter_exit(struct disk_part_iter *piter); extern bool disk_has_partitions(struct gendisk *disk); -- cgit v1.2.3 From 0d02129e76edf91cf04fabf1efbc3a9a1f1d729a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Nov 2020 16:43:51 +0100 Subject: block: merge struct block_device and struct hd_struct Instead of having two structures that represent each block device with different life time rules, merge them into a single one. This also greatly simplifies the reference counting rules, as we can use the inode reference count as the main reference count for the new struct block_device, with the device model reference front ending it for device model interaction. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 8 ++++++-- include/linux/blkdev.h | 1 - include/linux/genhd.h | 40 +++++++++------------------------------- 3 files changed, 15 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6edea5c16259..866f74261b3b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -8,6 +8,7 @@ #include #include +#include #include struct bio_set; @@ -30,6 +31,7 @@ struct block_device { struct super_block * bd_super; struct mutex bd_mutex; /* open/close mutex */ void * bd_claiming; + struct device bd_device; void * bd_holder; int bd_holders; bool bd_write_holder; @@ -38,7 +40,6 @@ struct block_device { #endif struct kobject *bd_holder_dir; u8 bd_partno; - struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; @@ -61,8 +62,11 @@ struct block_device { #define bdev_whole(_bdev) \ ((_bdev)->bd_disk->part0) +#define dev_to_bdev(device) \ + container_of((device), struct block_device, bd_device) + #define bdev_kobj(_bdev) \ - (&part_to_dev((_bdev)->bd_part)->kobj) + (&((_bdev)->bd_device.kobj)) /* * Block error status values. See block/blk-core:blk_errors for the details. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1d4be1fc6007..17cedf0dc83d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1999,7 +1999,6 @@ void blkdev_put_no_open(struct block_device *bdev); struct block_device *bdev_alloc(struct gendisk *disk, u8 partno); void bdev_add(struct block_device *bdev, dev_t dev); struct block_device *I_BDEV(struct inode *inode); -struct block_device *bdget_part(struct hd_struct *part); struct block_device *bdgrab(struct block_device *bdev); void bdput(struct block_device *); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index cd23c80265b2..809aaa32d53c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -19,12 +19,6 @@ #include #include -#define dev_to_part(device) container_of((device), struct hd_struct, __dev) -#define part_to_dev(part) (&((part)->__dev)) - -#define dev_to_disk(device) (dev_to_part(device)->bdev->bd_disk) -#define disk_to_dev(disk) (part_to_dev((disk)->part0->bd_part)) - extern const struct device_type disk_type; extern struct device_type part_type; extern struct class block_class; @@ -51,11 +45,6 @@ struct partition_meta_info { u8 volname[PARTITION_META_INFO_VOLNAMELTH]; }; -struct hd_struct { - struct block_device *bdev; - struct device __dev; -}; - /** * DOC: genhd capability flags * @@ -190,19 +179,21 @@ struct gendisk { struct lockdep_map lockdep_map; }; +/* + * The gendisk is refcounted by the part0 block_device, and the bd_device + * therein is also used for device model presentation in sysfs. + */ +#define dev_to_disk(device) \ + (dev_to_bdev(device)->bd_disk) +#define disk_to_dev(disk) \ + (&((disk)->part0->bd_device)) + #if IS_REACHABLE(CONFIG_CDROM) #define disk_to_cdi(disk) ((disk)->cdi) #else #define disk_to_cdi(disk) NULL #endif -static inline struct gendisk *part_to_disk(struct hd_struct *part) -{ - if (unlikely(!part)) - return NULL; - return part->bdev->bd_disk; -} - static inline int disk_max_parts(struct gendisk *disk) { if (disk->flags & GENHD_FL_EXT_DEVT) @@ -221,19 +212,6 @@ static inline dev_t disk_devt(struct gendisk *disk) return MKDEV(disk->major, disk->first_minor); } -static inline dev_t part_devt(struct hd_struct *part) -{ - return part_to_dev(part)->devt; -} - -extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); - -static inline void disk_put_part(struct hd_struct *part) -{ - if (likely(part)) - put_device(part_to_dev(part)); -} - /* * Smarter partition iterator without context limits. */ -- cgit v1.2.3 From c214550ff8eae9623605149fa4e3da3ba43df145 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 29 Nov 2020 22:05:50 +0200 Subject: net: delete __dev_getfirstbyhwtype The last user of the RTNL brother of dev_getfirstbyhwtype (the latter being synchronized under RCU) has been deleted in commit b4db2b35fc44 ("afs: Use core kernel UUID generation"). Cc: Arnd Bergmann Cc: David Howells Cc: Eric Dumazet Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20201129200550.2433401-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8eeb73ac58bd..2e077e289e75 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2815,7 +2815,6 @@ unsigned long netdev_boot_base(const char *prefix, int unit); struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, const char *hwaddr); struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); -struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type); void dev_add_pack(struct packet_type *pt); void dev_remove_pack(struct packet_type *pt); void __dev_remove_pack(struct packet_type *pt); -- cgit v1.2.3 From 1967939462641d8b36bcb3fcf06d48e66cd67a4f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 29 Nov 2020 04:33:35 +0900 Subject: Compiler Attributes: remove CONFIG_ENABLE_MUST_CHECK Revert commit cebc04ba9aeb ("add CONFIG_ENABLE_MUST_CHECK"). A lot of warn_unused_result warnings existed in 2006, but until now they have been fixed thanks to people doing allmodconfig tests. Our goal is to always enable __must_check where appropriate, so this CONFIG option is no longer needed. I see a lot of defconfig (arch/*/configs/*_defconfig) files having: # CONFIG_ENABLE_MUST_CHECK is not set I did not touch them for now since it would be a big churn. If arch maintainers want to clean them up, please go ahead. While I was here, I also moved __must_check to compiler_attributes.h from compiler_types.h Signed-off-by: Masahiro Yamada Acked-by: Jason A. Donenfeld Acked-by: Nathan Chancellor Reviewed-by: Nick Desaulniers [Moved addition in compiler_attributes.h to keep it sorted] Signed-off-by: Miguel Ojeda --- include/linux/compiler_attributes.h | 6 ++++++ include/linux/compiler_types.h | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index b2a3f4f641a7..ea5e04e75845 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -272,6 +272,12 @@ */ #define __used __attribute__((__used__)) +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-warn_005funused_005fresult-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#nodiscard-warn-unused-result + */ +#define __must_check __attribute__((__warn_unused_result__)) + /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index ac3fa37a84f9..7ef20d1a6c28 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -110,12 +110,6 @@ struct ftrace_likely_data { unsigned long constant; }; -#ifdef CONFIG_ENABLE_MUST_CHECK -#define __must_check __attribute__((__warn_unused_result__)) -#else -#define __must_check -#endif - #if defined(CC_USING_HOTPATCH) #define notrace __attribute__((hotpatch(0, 0))) #elif defined(CC_USING_PATCHABLE_FUNCTION_ENTRY) -- cgit v1.2.3 From 1446e1df9eb183fdf81c3f0715402f1d7595d4cb Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 27 Nov 2020 14:32:34 -0500 Subject: kernel: Implement selective syscall userspace redirection Introduce a mechanism to quickly disable/enable syscall handling for a specific process and redirect to userspace via SIGSYS. This is useful for processes with parts that require syscall redirection and parts that don't, but who need to perform this boundary crossing really fast, without paying the cost of a system call to reconfigure syscall handling on each boundary transition. This is particularly important for Windows games running over Wine. The proposed interface looks like this: prctl(PR_SET_SYSCALL_USER_DISPATCH, , , , [selector]) The range [,+) is a part of the process memory map that is allowed to by-pass the redirection code and dispatch syscalls directly, such that in fast paths a process doesn't need to disable the trap nor the kernel has to check the selector. This is essential to return from SIGSYS to a blocked area without triggering another SIGSYS from rt_sigreturn. selector is an optional pointer to a char-sized userspace memory region that has a key switch for the mechanism. This key switch is set to either PR_SYS_DISPATCH_ON, PR_SYS_DISPATCH_OFF to enable and disable the redirection without calling the kernel. The feature is meant to be set per-thread and it is disabled on fork/clone/execv. Internally, this doesn't add overhead to the syscall hot path, and it requires very little per-architecture support. I avoided using seccomp, even though it duplicates some functionality, due to previous feedback that maybe it shouldn't mix with seccomp since it is not a security mechanism. And obviously, this should never be considered a security mechanism, since any part of the program can by-pass it by using the syscall dispatcher. For the sysinfo benchmark, which measures the overhead added to executing a native syscall that doesn't require interception, the overhead using only the direct dispatcher region to issue syscalls is pretty much irrelevant. The overhead of using the selector goes around 40ns for a native (unredirected) syscall in my system, and it is (as expected) dominated by the supervisor-mode user-address access. In fact, with SMAP off, the overhead is consistently less than 5ns on my test box. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Acked-by: Peter Zijlstra (Intel) Acked-by: Kees Cook Link: https://lore.kernel.org/r/20201127193238.821364-4-krisman@collabora.com --- include/linux/sched.h | 2 ++ include/linux/syscall_user_dispatch.h | 40 +++++++++++++++++++++++++++++++++++ include/linux/thread_info.h | 2 ++ 3 files changed, 44 insertions(+) create mode 100644 include/linux/syscall_user_dispatch.h (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 063cd120b459..5a24a033b3f8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -965,6 +966,7 @@ struct task_struct { unsigned int sessionid; #endif struct seccomp seccomp; + struct syscall_user_dispatch syscall_dispatch; /* Thread group tracking: */ u64 parent_exec_id; diff --git a/include/linux/syscall_user_dispatch.h b/include/linux/syscall_user_dispatch.h new file mode 100644 index 000000000000..a0ae443fb7df --- /dev/null +++ b/include/linux/syscall_user_dispatch.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Collabora Ltd. + */ +#ifndef _SYSCALL_USER_DISPATCH_H +#define _SYSCALL_USER_DISPATCH_H + +#include + +#ifdef CONFIG_GENERIC_ENTRY + +struct syscall_user_dispatch { + char __user *selector; + unsigned long offset; + unsigned long len; + bool on_dispatch; +}; + +int set_syscall_user_dispatch(unsigned long mode, unsigned long offset, + unsigned long len, char __user *selector); + +#define clear_syscall_work_syscall_user_dispatch(tsk) \ + clear_task_syscall_work(tsk, SYSCALL_USER_DISPATCH) + +#else +struct syscall_user_dispatch {}; + +static inline int set_syscall_user_dispatch(unsigned long mode, unsigned long offset, + unsigned long len, char __user *selector) +{ + return -EINVAL; +} + +static inline void clear_syscall_work_syscall_user_dispatch(struct task_struct *tsk) +{ +} + +#endif /* CONFIG_GENERIC_ENTRY */ + +#endif /* _SYSCALL_USER_DISPATCH_H */ diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index ca80a214df09..c8a974cead73 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -42,6 +42,7 @@ enum syscall_work_bit { SYSCALL_WORK_BIT_SYSCALL_TRACE, SYSCALL_WORK_BIT_SYSCALL_EMU, SYSCALL_WORK_BIT_SYSCALL_AUDIT, + SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH, }; #define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) @@ -49,6 +50,7 @@ enum syscall_work_bit { #define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE) #define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU) #define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT) +#define SYSCALL_WORK_SYSCALL_USER_DISPATCH BIT(SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH) #endif #include -- cgit v1.2.3 From 11894468e39def270199f845b76df6c36d4ed133 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 27 Nov 2020 14:32:35 -0500 Subject: entry: Support Syscall User Dispatch on common syscall entry Syscall User Dispatch (SUD) must take precedence over seccomp and ptrace, since the use case is emulation (it can be invoked with a different ABI) such that seccomp filtering by syscall number doesn't make sense in the first place. In addition, either the syscall is dispatched back to userspace, in which case there is no resource for to trace, or the syscall will be executed, and seccomp/ptrace will execute next. Since SUD runs before tracepoints, it needs to be a SYSCALL_WORK_EXIT as well, just to prevent a trace exit event when dispatch was triggered. For that, the on_syscall_dispatch() examines context to skip the tracepoint, audit and other work. [ tglx: Add a comment on the exit side ] Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Acked-by: Peter Zijlstra (Intel) Acked-by: Kees Cook Link: https://lore.kernel.org/r/20201127193238.821364-5-krisman@collabora.com --- include/linux/entry-common.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 49b26b216e4e..a6e98b4ba8e9 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -44,10 +44,12 @@ SYSCALL_WORK_SYSCALL_TRACE | \ SYSCALL_WORK_SYSCALL_EMU | \ SYSCALL_WORK_SYSCALL_AUDIT | \ + SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ ARCH_SYSCALL_WORK_ENTER) #define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ SYSCALL_WORK_SYSCALL_TRACE | \ SYSCALL_WORK_SYSCALL_AUDIT | \ + SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ ARCH_SYSCALL_WORK_EXIT) /* -- cgit v1.2.3 From 96e2fbccd0fc806364a964fdf072bfc858a66109 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 1 Dec 2020 15:27:53 +0100 Subject: entry_Add_enter_from_user_mode_wrapper To be called from architecture specific code if the combo interfaces are not suitable. It simply calls __enter_from_user_mode(). This way __enter_from_user_mode will still be inlined because it is declared static __always_inline. [ tglx: Amend comments and move it to a different location in the header ] Signed-off-by: Sven Schnelle Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201201142755.31931-4-svens@linux.ibm.com --- include/linux/entry-common.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index a6e98b4ba8e9..da60980a2e7b 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -101,6 +101,27 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs } #endif +/** + * enter_from_user_mode - Establish state when coming from user mode + * + * Syscall/interrupt entry disables interrupts, but user mode is traced as + * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. + * + * 1) Tell lockdep that interrupts are disabled + * 2) Invoke context tracking if enabled to reactivate RCU + * 3) Trace interrupts off state + * + * Invoked from architecture specific syscall entry code with interrupts + * disabled. The calling code has to be non-instrumentable. When the + * function returns all state is correct and interrupts are still + * disabled. The subsequent functions can be instrumented. + * + * This is invoked when there is architecture specific functionality to be + * done between establishing state and enabling interrupts. The caller must + * enable interrupts before invoking syscall_enter_from_user_mode_work(). + */ +void enter_from_user_mode(struct pt_regs *regs); + /** * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts * @regs: Pointer to currents pt_regs @@ -110,7 +131,8 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs * function returns all state is correct, interrupts are enabled and the * subsequent functions can be instrumented. * - * This handles lockdep, RCU (context tracking) and tracing state. + * This handles lockdep, RCU (context tracking) and tracing state, i.e. + * the functionality provided by enter_from_user_mode(). * * This is invoked when there is extra architecture specific functionality * to be done between establishing state and handling user mode entry work. -- cgit v1.2.3 From 310de1a678b2184c078c593dae343cb79c807f8d Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 1 Dec 2020 15:27:54 +0100 Subject: entry: Add exit_to_user_mode() wrapper Called from architecture specific code when syscall_exit_to_user_mode() is not suitable. It simply calls __exit_to_user_mode(). This way __exit_to_user_mode() can still be inlined because it is declared static __always_inline. [ tglx: Amended comments and moved it to a different place in the header ] Signed-off-by: Sven Schnelle Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201201142755.31931-5-svens@linux.ibm.com --- include/linux/entry-common.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index da60980a2e7b..e370be8121aa 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -300,6 +300,25 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step) } #endif +/** + * exit_to_user_mode - Fixup state when exiting to user mode + * + * Syscall/interrupt exit enables interrupts, but the kernel state is + * interrupts disabled when this is invoked. Also tell RCU about it. + * + * 1) Trace interrupts on state + * 2) Invoke context tracking if enabled to adjust RCU state + * 3) Invoke architecture specific last minute exit code, e.g. speculation + * mitigations, etc.: arch_exit_to_user_mode() + * 4) Tell lockdep that interrupts are enabled + * + * Invoked from architecture specific code when syscall_exit_to_user_mode() + * is not suitable as the last step before returning to userspace. Must be + * invoked with interrupts disabled and the caller must be + * non-instrumentable. + */ +void exit_to_user_mode(void); + /** * syscall_exit_to_user_mode - Handle work before returning to user mode * @regs: Pointer to currents pt_regs @@ -322,8 +341,8 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step) * - Architecture specific one time work arch_exit_to_user_mode_prepare() * - Address limit and lockdep checks * - * 3) Final transition (lockdep, tracing, context tracking, RCU). Invokes - * arch_exit_to_user_mode() to handle e.g. speculation mitigations + * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the + * functionality in exit_to_user_mode(). */ void syscall_exit_to_user_mode(struct pt_regs *regs); -- cgit v1.2.3 From c6156e1da633f241e132eaea3b676d674376d770 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 1 Dec 2020 15:27:55 +0100 Subject: entry: Add syscall_exit_to_user_mode_work() This is the same as syscall_exit_to_user_mode() but without calling exit_to_user_mode(). This can be used if there is an architectural reason to avoid the combo function, e.g. restarting a syscall without returning to userspace. Before returning to user space the caller has to invoke exit_to_user_mode(). [ tglx: Amended comments ] Signed-off-by: Sven Schnelle Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201201142755.31931-6-svens@linux.ibm.com --- include/linux/entry-common.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index e370be8121aa..7c581a4c3797 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -316,9 +316,25 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step) * is not suitable as the last step before returning to userspace. Must be * invoked with interrupts disabled and the caller must be * non-instrumentable. + * The caller has to invoke syscall_exit_to_user_mode_work() before this. */ void exit_to_user_mode(void); +/** + * syscall_exit_to_user_mode_work - Handle work before returning to user mode + * @regs: Pointer to currents pt_regs + * + * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling + * exit_to_user_mode() to perform the final transition to user mode. + * + * Calling convention is the same as for syscall_exit_to_user_mode() and it + * returns with all work handled and interrupts disabled. The caller must + * invoke exit_to_user_mode() before actually switching to user mode to + * make the final state transitions. Interrupts must stay disabled between + * return from this function and the invocation of exit_to_user_mode(). + */ +void syscall_exit_to_user_mode_work(struct pt_regs *regs); + /** * syscall_exit_to_user_mode - Handle work before returning to user mode * @regs: Pointer to currents pt_regs @@ -343,6 +359,10 @@ void exit_to_user_mode(void); * * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the * functionality in exit_to_user_mode(). + * + * This is a combination of syscall_exit_to_user_mode_work() (1,2) and + * exit_to_user_mode(). This function is preferred unless there is a + * compelling architectural reason to use the seperate functions. */ void syscall_exit_to_user_mode(struct pt_regs *regs); -- cgit v1.2.3 From 6b6667aa4d1e0866f00b62d35a9be3875c7551f8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 24 Nov 2020 17:58:12 +0000 Subject: block: optimise for_each_bvec() advance Because of how for_each_bvec() works it never advances across multiple entries at a time, so bvec_iter_advance() is an overkill. Add specialised bvec_iter_advance_single() that is faster. It also handles zero-len bvecs, so can kill bvec_iter_skip_zero_bvec(). text data bss dec hex filename before: 23977 805 0 24782 60ce lib/iov_iter.o before, bvec_iter_advance() w/o WARN_ONCE() 22886 600 0 23486 5bbe ./lib/iov_iter.o after: 21862 600 0 22462 57be lib/iov_iter.o Signed-off-by: Pavel Begunkov Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bvec.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 2efec10bf792..ff832e698efb 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -121,18 +121,28 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, return true; } -static inline void bvec_iter_skip_zero_bvec(struct bvec_iter *iter) +/* + * A simpler version of bvec_iter_advance(), @bytes should not span + * across multiple bvec entries, i.e. bytes <= bv[i->bi_idx].bv_len + */ +static inline void bvec_iter_advance_single(const struct bio_vec *bv, + struct bvec_iter *iter, unsigned int bytes) { - iter->bi_bvec_done = 0; - iter->bi_idx++; + unsigned int done = iter->bi_bvec_done + bytes; + + if (done == bv[iter->bi_idx].bv_len) { + done = 0; + iter->bi_idx++; + } + iter->bi_bvec_done = done; + iter->bi_size -= bytes; } #define for_each_bvec(bvl, bio_vec, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ - (bvl).bv_len ? (void)bvec_iter_advance((bio_vec), &(iter), \ - (bvl).bv_len) : bvec_iter_skip_zero_bvec(&(iter))) + bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len)) /* for iterating one bio from start to end */ #define BVEC_ITER_ALL_INIT (struct bvec_iter) \ -- cgit v1.2.3 From 22b56c2964386ddced252be407150b22f85e209e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 24 Nov 2020 17:58:13 +0000 Subject: bio: optimise bvec iteration __bio_for_each_bvec(), __bio_for_each_segment() and bio_copy_data_iter() fall under conditions of bvec_iter_advance_single(), which is a faster and slimmer version of bvec_iter_advance(). Add bio_advance_iter_single() and convert them. Signed-off-by: Pavel Begunkov Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index ecf67108f091..1edda614f7ce 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -148,11 +148,24 @@ static inline void bio_advance_iter(const struct bio *bio, /* TODO: It is reasonable to complete bio with error here. */ } +/* @bytes should be less or equal to bvec[i->bi_idx].bv_len */ +static inline void bio_advance_iter_single(const struct bio *bio, + struct bvec_iter *iter, + unsigned int bytes) +{ + iter->bi_sector += bytes >> 9; + + if (bio_no_advance_iter(bio)) + iter->bi_size -= bytes; + else + bvec_iter_advance_single(bio->bi_io_vec, iter, bytes); +} + #define __bio_for_each_segment(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bio_iter_iovec((bio), (iter))), 1); \ - bio_advance_iter((bio), &(iter), (bvl).bv_len)) + bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) #define bio_for_each_segment(bvl, bio, iter) \ __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) @@ -161,7 +174,7 @@ static inline void bio_advance_iter(const struct bio *bio, for (iter = (start); \ (iter).bi_size && \ ((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \ - bio_advance_iter((bio), &(iter), (bvl).bv_len)) + bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) /* iterate over multi-page bvec */ #define bio_for_each_bvec(bvl, bio, iter) \ -- cgit v1.2.3 From 93b8959a0a8cf1b1a493efee9e8328681e111862 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Nov 2020 15:24:41 -0500 Subject: NFS: More readdir cleanups Remove the redundant caching of the credential in struct nfs_open_dir_context. Pass the buffer size as an argument to nfs_readdir_xdr_filler(). Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- include/linux/nfs_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a2c6455ea3fa..dd6b463dda80 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -88,7 +88,6 @@ struct nfs_open_context { struct nfs_open_dir_context { struct list_head list; - const struct cred *cred; unsigned long attr_gencount; __u64 dir_cookie; __u64 dup_cookie; -- cgit v1.2.3 From 82e22a5e6245873779db1607d3b0fec6f9ca07d0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Nov 2020 17:34:23 -0500 Subject: NFS: Allow the NFS generic code to pass in a verifier to readdir If we're ever going to allow support for servers that use the readdir verifier, then that use needs to be managed by the middle layers as those need to be able to reject cookies from other verifiers. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- include/linux/nfs_xdr.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d63cb862d58e..3327239fa2f9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -750,6 +750,20 @@ struct nfs_entry { struct nfs_server * server; }; +struct nfs_readdir_arg { + struct dentry *dentry; + const struct cred *cred; + __be32 *verf; + u64 cookie; + struct page **pages; + unsigned int page_len; + bool plus; +}; + +struct nfs_readdir_res { + __be32 *verf; +}; + /* * The following types are for NFSv2 only. */ @@ -1744,8 +1758,7 @@ struct nfs_rpc_ops { unsigned int, struct iattr *); int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, const struct qstr *); - int (*readdir) (struct dentry *, const struct cred *, - u64, struct page **, unsigned int, bool); + int (*readdir) (struct nfs_readdir_arg *, struct nfs_readdir_res *); int (*mknod) (struct inode *, struct dentry *, struct iattr *, dev_t); int (*statfs) (struct nfs_server *, struct nfs_fh *, -- cgit v1.2.3 From b593c09f83a2732a0f0298c8f3468236a83cdd9f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Nov 2020 20:06:12 -0500 Subject: NFS: Improve handling of directory verifiers If the server insists on using the readdir verifiers in order to allow cookies to expire, then we should ensure that we cache the verifier with the cookie, so that we can return an error if the application tries to use the expired cookie. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- include/linux/nfs_fs.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index dd6b463dda80..681ed98e4ba8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -45,6 +45,11 @@ */ #define NFS_RPC_SWAPFLAGS (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS) +/* + * Size of the NFS directory verifier + */ +#define NFS_DIR_VERIFIER_SIZE 2 + /* * NFSv3/v4 Access mode cache entry */ @@ -89,6 +94,7 @@ struct nfs_open_context { struct nfs_open_dir_context { struct list_head list; unsigned long attr_gencount; + __be32 verf[NFS_DIR_VERIFIER_SIZE]; __u64 dir_cookie; __u64 dup_cookie; signed char duped; @@ -156,7 +162,7 @@ struct nfs_inode { * This is the cookie verifier used for NFSv3 readdir * operations */ - __be32 cookieverf[2]; + __be32 cookieverf[NFS_DIR_VERIFIER_SIZE]; atomic_long_t nrequests; struct nfs_mds_commit_info commit_info; -- cgit v1.2.3 From d5aa6b22e2258f05317313ecc02efbb988ed6d38 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 6 Nov 2020 16:33:38 -0500 Subject: SUNRPC: xprt_load_transport() needs to support the netid "rdma6" According to RFC5666, the correct netid for an IPv6 addressed RDMA transport is "rdma6", which we've supported as a mount option since Linux-4.7. The problem is when we try to load the module "xprtrdma6", that will fail, since there is no modulealias of that name. Fixes: 181342c5ebe8 ("xprtrdma: Add rdma6 option to support NFS/RDMA IPv6") Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a603d48d2b2c..3ac5037d1c3d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -330,6 +330,7 @@ struct xprt_class { struct rpc_xprt * (*setup)(struct xprt_create *); struct module *owner; char name[32]; + const char * netid[]; }; /* -- cgit v1.2.3 From 1fc5f13186440973e1aa1d85aa263326756af431 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Nov 2020 09:41:21 -0500 Subject: SUNRPC: Add a helper to return the transport identifier given a netid Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3ac5037d1c3d..f7b75c72f80e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -386,6 +386,7 @@ xprt_disable_swap(struct rpc_xprt *xprt) int xprt_register_transport(struct xprt_class *type); int xprt_unregister_transport(struct xprt_class *type); int xprt_load_transport(const char *); +int xprt_find_transport_ident(const char *); void xprt_wait_for_reply_request_def(struct rpc_task *task); void xprt_wait_for_reply_request_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); -- cgit v1.2.3 From c87b056e58e71ba7a3f603700618f8da9742aa29 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Nov 2020 10:32:14 -0500 Subject: SUNRPC: Remove unused function xprt_load_transport() Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index f7b75c72f80e..d2e97ee802af 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -385,7 +385,6 @@ xprt_disable_swap(struct rpc_xprt *xprt) */ int xprt_register_transport(struct xprt_class *type); int xprt_unregister_transport(struct xprt_class *type); -int xprt_load_transport(const char *); int xprt_find_transport_ident(const char *); void xprt_wait_for_reply_request_def(struct rpc_task *task); void xprt_wait_for_reply_request_rtt(struct rpc_task *task); -- cgit v1.2.3 From 8a6a5920d3286eb0eae9f36a4ec4fc9df511eccb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2020 12:57:30 +0100 Subject: sched/vtime: Consolidate IRQ time accounting The 3 architectures implementing CONFIG_VIRT_CPU_ACCOUNTING_NATIVE all have their own version of irq time accounting that dispatch the cputime to the appropriate index: hardirq, softirq, system, idle, guest... from an all-in-one function. Instead of having these ad-hoc versions, move the cputime destination dispatch decision to the core code and leave only the actual per-index cputime accounting to the architecture. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201202115732.27827-4-frederic@kernel.org --- include/linux/vtime.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 2cdeca062db3..6c9867419615 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -83,16 +83,12 @@ static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { } #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -extern void vtime_account_irq_enter(struct task_struct *tsk); -static inline void vtime_account_irq_exit(struct task_struct *tsk) -{ - /* On hard|softirq exit we always account to hard|softirq cputime */ - vtime_account_kernel(tsk); -} +extern void vtime_account_irq(struct task_struct *tsk); +extern void vtime_account_softirq(struct task_struct *tsk); +extern void vtime_account_hardirq(struct task_struct *tsk); extern void vtime_flush(struct task_struct *tsk); #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -static inline void vtime_account_irq_enter(struct task_struct *tsk) { } -static inline void vtime_account_irq_exit(struct task_struct *tsk) { } +static inline void vtime_account_irq(struct task_struct *tsk) { } static inline void vtime_flush(struct task_struct *tsk) { } #endif @@ -105,13 +101,13 @@ static inline void irqtime_account_irq(struct task_struct *tsk) { } static inline void account_irq_enter_time(struct task_struct *tsk) { - vtime_account_irq_enter(tsk); + vtime_account_irq(tsk); irqtime_account_irq(tsk); } static inline void account_irq_exit_time(struct task_struct *tsk) { - vtime_account_irq_exit(tsk); + vtime_account_irq(tsk); irqtime_account_irq(tsk); } -- cgit v1.2.3 From d3759e7184f8f6187e62f8c4e7dcb1f6c47c075a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2020 12:57:31 +0100 Subject: irqtime: Move irqtime entry accounting after irq offset incrementation IRQ time entry is currently accounted before HARDIRQ_OFFSET or SOFTIRQ_OFFSET are incremented. This is convenient to decide to which index the cputime to account is dispatched. Unfortunately it prevents tick_irq_enter() from being called under HARDIRQ_OFFSET because tick_irq_enter() has to be called before the IRQ entry accounting due to the necessary clock catch up. As a result we don't benefit from appropriate lockdep coverage on tick_irq_enter(). To prepare for fixing this, move the IRQ entry cputime accounting after the preempt offset is incremented. This requires the cputime dispatch code to handle the extra offset. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20201202115732.27827-5-frederic@kernel.org --- include/linux/hardirq.h | 4 ++-- include/linux/vtime.h | 34 ++++++++++++++++++++++++---------- 2 files changed, 26 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 754f67ac4326..7c9d6a2d7e90 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -32,9 +32,9 @@ static __always_inline void rcu_irq_enter_check_tick(void) */ #define __irq_enter() \ do { \ - account_irq_enter_time(current); \ preempt_count_add(HARDIRQ_OFFSET); \ lockdep_hardirq_enter(); \ + account_hardirq_enter(current); \ } while (0) /* @@ -62,8 +62,8 @@ void irq_enter_rcu(void); */ #define __irq_exit() \ do { \ + account_hardirq_exit(current); \ lockdep_hardirq_exit(); \ - account_irq_exit_time(current); \ preempt_count_sub(HARDIRQ_OFFSET); \ } while (0) diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 6c9867419615..041d6524d144 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -83,32 +83,46 @@ static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { } #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -extern void vtime_account_irq(struct task_struct *tsk); +extern void vtime_account_irq(struct task_struct *tsk, unsigned int offset); extern void vtime_account_softirq(struct task_struct *tsk); extern void vtime_account_hardirq(struct task_struct *tsk); extern void vtime_flush(struct task_struct *tsk); #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -static inline void vtime_account_irq(struct task_struct *tsk) { } +static inline void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { } +static inline void vtime_account_softirq(struct task_struct *tsk) { } +static inline void vtime_account_hardirq(struct task_struct *tsk) { } static inline void vtime_flush(struct task_struct *tsk) { } #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING -extern void irqtime_account_irq(struct task_struct *tsk); +extern void irqtime_account_irq(struct task_struct *tsk, unsigned int offset); #else -static inline void irqtime_account_irq(struct task_struct *tsk) { } +static inline void irqtime_account_irq(struct task_struct *tsk, unsigned int offset) { } #endif -static inline void account_irq_enter_time(struct task_struct *tsk) +static inline void account_softirq_enter(struct task_struct *tsk) { - vtime_account_irq(tsk); - irqtime_account_irq(tsk); + vtime_account_irq(tsk, SOFTIRQ_OFFSET); + irqtime_account_irq(tsk, SOFTIRQ_OFFSET); } -static inline void account_irq_exit_time(struct task_struct *tsk) +static inline void account_softirq_exit(struct task_struct *tsk) { - vtime_account_irq(tsk); - irqtime_account_irq(tsk); + vtime_account_softirq(tsk); + irqtime_account_irq(tsk, 0); +} + +static inline void account_hardirq_enter(struct task_struct *tsk) +{ + vtime_account_irq(tsk, HARDIRQ_OFFSET); + irqtime_account_irq(tsk, HARDIRQ_OFFSET); +} + +static inline void account_hardirq_exit(struct task_struct *tsk) +{ + vtime_account_hardirq(tsk); + irqtime_account_irq(tsk, 0); } #endif /* _LINUX_KERNEL_VTIME_H */ -- cgit v1.2.3 From 427167c0b064ed898b848209add62b4322ec7840 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 2 Dec 2020 09:25:15 -0800 Subject: bpf: Allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks I have to now lock/unlock socket for the bind hook execution. That shouldn't cause any overhead because the socket is unbound and shouldn't receive any traffic. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Andrey Ignatov Link: https://lore.kernel.org/bpf/20201202172516.3483656-3-sdf@google.com --- include/linux/bpf-cgroup.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index ed71bd1a0825..72e69a0e1e8c 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -246,11 +246,11 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, __ret; \ }) -#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND) +#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL) -#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND) +#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \ sk->sk_prot->pre_connect) @@ -434,8 +434,8 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) -- cgit v1.2.3 From ec0caa974cd092549ab282deb8ec7ea73b36eba0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 2 Dec 2020 18:20:37 -0800 Subject: fscrypt: introduce fscrypt_prepare_readdir() The last remaining use of fscrypt_get_encryption_info() from filesystems is for readdir (->iterate_shared()). Every other call is now in fs/crypto/ as part of some other higher-level operation. We need to add a new argument to fscrypt_get_encryption_info() to indicate whether the encryption policy is allowed to be unrecognized or not. Doing this is easier if we can work with high-level operations rather than direct filesystem use of fscrypt_get_encryption_info(). So add a function fscrypt_prepare_readdir() which wraps the call to fscrypt_get_encryption_info() for the readdir use case. Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201203022041.230976-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 0c9e64969b73..8cbb26f55695 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -242,6 +242,7 @@ int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, unsigned int flags); int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname); +int __fscrypt_prepare_readdir(struct inode *dir); int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags); int fscrypt_prepare_symlink(struct inode *dir, const char *target, @@ -537,6 +538,11 @@ static inline int __fscrypt_prepare_lookup(struct inode *dir, return -EOPNOTSUPP; } +static inline int __fscrypt_prepare_readdir(struct inode *dir) +{ + return -EOPNOTSUPP; +} + static inline int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags) @@ -795,6 +801,24 @@ static inline int fscrypt_prepare_lookup(struct inode *dir, return 0; } +/** + * fscrypt_prepare_readdir() - prepare to read a possibly-encrypted directory + * @dir: the directory inode + * + * If the directory is encrypted and it doesn't already have its encryption key + * set up, try to set it up so that the filenames will be listed in plaintext + * form rather than in no-key form. + * + * Return: 0 on success; -errno on error. Note that the encryption key being + * unavailable is not considered an error. + */ +static inline int fscrypt_prepare_readdir(struct inode *dir) +{ + if (IS_ENCRYPTED(dir)) + return __fscrypt_prepare_readdir(dir); + return 0; +} + /** * fscrypt_prepare_setattr() - prepare to change a possibly-encrypted inode's * attributes -- cgit v1.2.3 From 7622350e5eda2cc57a72c6b27f1405d8b4f94670 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 2 Dec 2020 18:20:38 -0800 Subject: fscrypt: move body of fscrypt_prepare_setattr() out-of-line In preparation for reducing the visibility of fscrypt_require_key() by moving it to fscrypt_private.h, move the call to it from fscrypt_prepare_setattr() to an out-of-line function. Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201203022041.230976-7-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 8cbb26f55695..b20900bb829f 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -243,6 +243,7 @@ int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname); int __fscrypt_prepare_readdir(struct inode *dir); +int __fscrypt_prepare_setattr(struct dentry *dentry, struct iattr *attr); int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags); int fscrypt_prepare_symlink(struct inode *dir, const char *target, @@ -543,6 +544,12 @@ static inline int __fscrypt_prepare_readdir(struct inode *dir) return -EOPNOTSUPP; } +static inline int __fscrypt_prepare_setattr(struct dentry *dentry, + struct iattr *attr) +{ + return -EOPNOTSUPP; +} + static inline int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags) @@ -840,8 +847,8 @@ static inline int fscrypt_prepare_readdir(struct inode *dir) static inline int fscrypt_prepare_setattr(struct dentry *dentry, struct iattr *attr) { - if (attr->ia_valid & ATTR_SIZE) - return fscrypt_require_key(d_inode(dentry)); + if (IS_ENCRYPTED(d_inode(dentry))) + return __fscrypt_prepare_setattr(dentry, attr); return 0; } -- cgit v1.2.3 From de3cdc6e75179a2324c23400b21483a1372c95e1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 2 Dec 2020 18:20:39 -0800 Subject: fscrypt: move fscrypt_require_key() to fscrypt_private.h fscrypt_require_key() is now only used by files in fs/crypto/. So reduce its visibility to fscrypt_private.h. This is also a prerequsite for unexporting fscrypt_get_encryption_info(). Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201203022041.230976-8-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index b20900bb829f..a07610f27926 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -688,32 +688,6 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) return fscrypt_get_info(inode) != NULL; } -/** - * fscrypt_require_key() - require an inode's encryption key - * @inode: the inode we need the key for - * - * If the inode is encrypted, set up its encryption key if not already done. - * Then require that the key be present and return -ENOKEY otherwise. - * - * No locks are needed, and the key will live as long as the struct inode --- so - * it won't go away from under you. - * - * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code - * if a problem occurred while setting up the encryption key. - */ -static inline int fscrypt_require_key(struct inode *inode) -{ - if (IS_ENCRYPTED(inode)) { - int err = fscrypt_get_encryption_info(inode); - - if (err) - return err; - if (!fscrypt_has_encryption_key(inode)) - return -ENOKEY; - } - return 0; -} - /** * fscrypt_prepare_link() - prepare to link an inode into a possibly-encrypted * directory -- cgit v1.2.3 From 5b421f08801fe8247dec368b3d323958f419e769 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 2 Dec 2020 18:20:40 -0800 Subject: fscrypt: unexport fscrypt_get_encryption_info() Now that fscrypt_get_encryption_info() is only called from files in fs/crypto/ (due to all key setup now being handled by higher-level helper functions instead of directly by filesystems), unexport it and move its declaration to fscrypt_private.h. Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201203022041.230976-9-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index a07610f27926..4b163f5e58e9 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -75,7 +75,7 @@ struct fscrypt_operations { static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) { /* - * Pairs with the cmpxchg_release() in fscrypt_get_encryption_info(). + * Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info(). * I.e., another task may publish ->i_crypt_info concurrently, executing * a RELEASE barrier. We need to use smp_load_acquire() here to safely * ACQUIRE the memory the other task published. @@ -200,7 +200,6 @@ int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg); int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg); /* keysetup.c */ -int fscrypt_get_encryption_info(struct inode *inode); int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode, bool *encrypt_ret); void fscrypt_put_encryption_info(struct inode *inode); @@ -408,10 +407,6 @@ static inline int fscrypt_ioctl_get_key_status(struct file *filp, } /* keysetup.c */ -static inline int fscrypt_get_encryption_info(struct inode *inode) -{ - return -EOPNOTSUPP; -} static inline int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode, -- cgit v1.2.3 From a14d0b6764917b21ee6fdfd2a8a4c2920fbefcce Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 2 Dec 2020 18:20:41 -0800 Subject: fscrypt: allow deleting files with unsupported encryption policy Currently it's impossible to delete files that use an unsupported encryption policy, as the kernel will just return an error when performing any operation on the top-level encrypted directory, even just a path lookup into the directory or opening the directory for readdir. More specifically, this occurs in any of the following cases: - The encryption context has an unrecognized version number. Current kernels know about v1 and v2, but there could be more versions in the future. - The encryption context has unrecognized encryption modes (FSCRYPT_MODE_*) or flags (FSCRYPT_POLICY_FLAG_*), an unrecognized combination of modes, or reserved bits set. - The encryption key has been added and the encryption modes are recognized but aren't available in the crypto API -- for example, a directory is encrypted with FSCRYPT_MODE_ADIANTUM but the kernel doesn't have CONFIG_CRYPTO_ADIANTUM enabled. It's desirable to return errors for most operations on files that use an unsupported encryption policy, but the current behavior is too strict. We need to allow enough to delete files, so that people can't be stuck with undeletable files when downgrading kernel versions. That includes allowing directories to be listed and allowing dentries to be looked up. Fix this by modifying the key setup logic to treat an unsupported encryption policy in the same way as "key unavailable" in the cases that are required for a recursive delete to work: preparing for a readdir or a dentry lookup, revalidating a dentry, or checking whether an inode has the same encryption policy as its parent directory. Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201203022041.230976-10-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 4b163f5e58e9..d23156d1ac94 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -753,8 +753,9 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir, * * Prepare for ->lookup() in a directory which may be encrypted by determining * the name that will actually be used to search the directory on-disk. If the - * directory's encryption key is available, then the lookup is assumed to be by - * plaintext name; otherwise, it is assumed to be by no-key name. + * directory's encryption policy is supported by this kernel and its encryption + * key is available, then the lookup is assumed to be by plaintext name; + * otherwise, it is assumed to be by no-key name. * * This also installs a custom ->d_revalidate() method which will invalidate the * dentry if it was created without the key and the key is later added. @@ -786,7 +787,9 @@ static inline int fscrypt_prepare_lookup(struct inode *dir, * form rather than in no-key form. * * Return: 0 on success; -errno on error. Note that the encryption key being - * unavailable is not considered an error. + * unavailable is not considered an error. It is also not an error if + * the encryption policy is unsupported by this kernel; that is treated + * like the key being unavailable, so that files can still be deleted. */ static inline int fscrypt_prepare_readdir(struct inode *dir) { -- cgit v1.2.3 From bcfe06bf2622f7c4899468e427683aec49070687 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:27 -0800 Subject: mm: memcontrol: Use helpers to read page's memcg data Patch series "mm: allow mapping accounted kernel pages to userspace", v6. Currently a non-slab kernel page which has been charged to a memory cgroup can't be mapped to userspace. The underlying reason is simple: PageKmemcg flag is defined as a page type (like buddy, offline, etc), so it takes a bit from a page->mapped counter. Pages with a type set can't be mapped to userspace. But in general the kmemcg flag has nothing to do with mapping to userspace. It only means that the page has been accounted by the page allocator, so it has to be properly uncharged on release. Some bpf maps are mapping the vmalloc-based memory to userspace, and their memory can't be accounted because of this implementation detail. This patchset removes this limitation by moving the PageKmemcg flag into one of the free bits of the page->mem_cgroup pointer. Also it formalizes accesses to the page->mem_cgroup and page->obj_cgroups using new helpers, adds several checks and removes a couple of obsolete functions. As the result the code became more robust with fewer open-coded bit tricks. This patch (of 4): Currently there are many open-coded reads of the page->mem_cgroup pointer, as well as a couple of read helpers, which are barely used. It creates an obstacle on a way to reuse some bits of the pointer for storing additional bits of information. In fact, we already do this for slab pages, where the last bit indicates that a pointer has an attached vector of objcg pointers instead of a regular memcg pointer. This commits uses 2 existing helpers and introduces a new helper to converts all read sides to calls of these helpers: struct mem_cgroup *page_memcg(struct page *page); struct mem_cgroup *page_memcg_rcu(struct page *page); struct mem_cgroup *page_memcg_check(struct page *page); page_memcg_check() is intended to be used in cases when the page can be a slab page and have a memcg pointer pointing at objcg vector. It does check the lowest bit, and if set, returns NULL. page_memcg() contains a VM_BUG_ON_PAGE() check for the page not being a slab page. To make sure nobody uses a direct access, struct page's mem_cgroup/obj_cgroups is converted to unsigned long memcg_data. Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Alexei Starovoitov Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Michal Hocko Link: https://lkml.kernel.org/r/20201027001657.3398190-1-guro@fb.com Link: https://lkml.kernel.org/r/20201027001657.3398190-2-guro@fb.com Link: https://lore.kernel.org/bpf/20201201215900.3569844-2-guro@fb.com --- include/linux/memcontrol.h | 114 +++++++++++++++++++++++++++++++++++++++++---- include/linux/mm.h | 22 --------- include/linux/mm_types.h | 5 +- 3 files changed, 106 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e391e3c56de5..f95c1433461c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -343,6 +343,79 @@ struct mem_cgroup { extern struct mem_cgroup *root_mem_cgroup; +/* + * page_memcg - get the memory cgroup associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the memory cgroup associated with the page, + * or NULL. This function assumes that the page is known to have a + * proper memory cgroup pointer. It's not safe to call this function + * against some type of pages, e.g. slab pages or ex-slab pages. + * + * Any of the following ensures page and memcg binding stability: + * - the page lock + * - LRU isolation + * - lock_page_memcg() + * - exclusive reference + */ +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + VM_BUG_ON_PAGE(PageSlab(page), page); + return (struct mem_cgroup *)page->memcg_data; +} + +/* + * page_memcg_rcu - locklessly get the memory cgroup associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the memory cgroup associated with the page, + * or NULL. This function assumes that the page is known to have a + * proper memory cgroup pointer. It's not safe to call this function + * against some type of pages, e.g. slab pages or ex-slab pages. + */ +static inline struct mem_cgroup *page_memcg_rcu(struct page *page) +{ + VM_BUG_ON_PAGE(PageSlab(page), page); + WARN_ON_ONCE(!rcu_read_lock_held()); + + return (struct mem_cgroup *)READ_ONCE(page->memcg_data); +} + +/* + * page_memcg_check - get the memory cgroup associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the memory cgroup associated with the page, + * or NULL. This function unlike page_memcg() can take any page + * as an argument. It has to be used in cases when it's not known if a page + * has an associated memory cgroup pointer or an object cgroups vector. + * + * Any of the following ensures page and memcg binding stability: + * - the page lock + * - LRU isolation + * - lock_page_memcg() + * - exclusive reference + */ +static inline struct mem_cgroup *page_memcg_check(struct page *page) +{ + /* + * Because page->memcg_data might be changed asynchronously + * for slab pages, READ_ONCE() should be used here. + */ + unsigned long memcg_data = READ_ONCE(page->memcg_data); + + /* + * The lowest bit set means that memcg isn't a valid + * memcg pointer, but a obj_cgroups pointer. + * In this case the page is shared and doesn't belong + * to any specific memory cgroup. + */ + if (memcg_data & 0x1UL) + return NULL; + + return (struct mem_cgroup *)memcg_data; +} + static __always_inline bool memcg_stat_item_in_bytes(int idx) { if (idx == MEMCG_PERCPU_B) @@ -743,15 +816,19 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, static inline void __mod_memcg_page_state(struct page *page, int idx, int val) { - if (page->mem_cgroup) - __mod_memcg_state(page->mem_cgroup, idx, val); + struct mem_cgroup *memcg = page_memcg(page); + + if (memcg) + __mod_memcg_state(memcg, idx, val); } static inline void mod_memcg_page_state(struct page *page, int idx, int val) { - if (page->mem_cgroup) - mod_memcg_state(page->mem_cgroup, idx, val); + struct mem_cgroup *memcg = page_memcg(page); + + if (memcg) + mod_memcg_state(memcg, idx, val); } static inline unsigned long lruvec_page_state(struct lruvec *lruvec, @@ -834,16 +911,17 @@ static inline void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { struct page *head = compound_head(page); /* rmap on tail pages */ + struct mem_cgroup *memcg = page_memcg(head); pg_data_t *pgdat = page_pgdat(page); struct lruvec *lruvec; /* Untracked pages have no memcg, no lruvec. Update only the node */ - if (!head->mem_cgroup) { + if (!memcg) { __mod_node_page_state(pgdat, idx, val); return; } - lruvec = mem_cgroup_lruvec(head->mem_cgroup, pgdat); + lruvec = mem_cgroup_lruvec(memcg, pgdat); __mod_lruvec_state(lruvec, idx, val); } @@ -878,8 +956,10 @@ static inline void count_memcg_events(struct mem_cgroup *memcg, static inline void count_memcg_page_event(struct page *page, enum vm_event_item idx) { - if (page->mem_cgroup) - count_memcg_events(page->mem_cgroup, idx, 1); + struct mem_cgroup *memcg = page_memcg(page); + + if (memcg) + count_memcg_events(memcg, idx, 1); } static inline void count_memcg_event_mm(struct mm_struct *mm, @@ -941,6 +1021,22 @@ void mem_cgroup_split_huge_fixup(struct page *head); struct mem_cgroup; +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + return NULL; +} + +static inline struct mem_cgroup *page_memcg_rcu(struct page *page) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + return NULL; +} + +static inline struct mem_cgroup *page_memcg_check(struct page *page) +{ + return NULL; +} + static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { return true; @@ -1430,7 +1526,7 @@ static inline void mem_cgroup_track_foreign_dirty(struct page *page, if (mem_cgroup_disabled()) return; - if (unlikely(&page->mem_cgroup->css != wb->memcg_css)) + if (unlikely(&page_memcg(page)->css != wb->memcg_css)) mem_cgroup_track_foreign_dirty_slowpath(page, wb); } diff --git a/include/linux/mm.h b/include/linux/mm.h index db6ae4d3fb4e..6b0c9d2c1d10 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1484,28 +1484,6 @@ static inline void set_page_links(struct page *page, enum zone_type zone, #endif } -#ifdef CONFIG_MEMCG -static inline struct mem_cgroup *page_memcg(struct page *page) -{ - return page->mem_cgroup; -} -static inline struct mem_cgroup *page_memcg_rcu(struct page *page) -{ - WARN_ON_ONCE(!rcu_read_lock_held()); - return READ_ONCE(page->mem_cgroup); -} -#else -static inline struct mem_cgroup *page_memcg(struct page *page) -{ - return NULL; -} -static inline struct mem_cgroup *page_memcg_rcu(struct page *page) -{ - WARN_ON_ONCE(!rcu_read_lock_held()); - return NULL; -} -#endif - /* * Some inline functions in vmstat.h depend on page_zone() */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5a9238f6caad..80f5d755c037 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -199,10 +199,7 @@ struct page { atomic_t _refcount; #ifdef CONFIG_MEMCG - union { - struct mem_cgroup *mem_cgroup; - struct obj_cgroup **obj_cgroups; - }; + unsigned long memcg_data; #endif /* -- cgit v1.2.3 From 270c6a71460e12b07b1dcadf7457ff95b6c6e8f4 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:28 -0800 Subject: mm: memcontrol/slab: Use helpers to access slab page's memcg_data To gather all direct accesses to struct page's memcg_data field in one place, let's introduce 3 new helpers to use in the slab accounting code: struct obj_cgroup **page_objcgs(struct page *page); struct obj_cgroup **page_objcgs_check(struct page *page); bool set_page_objcgs(struct page *page, struct obj_cgroup **objcgs); They are similar to the corresponding API for generic pages, except that the setter can return false, indicating that the value has been already set from a different thread. Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Alexei Starovoitov Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Link: https://lkml.kernel.org/r/20201027001657.3398190-3-guro@fb.com Link: https://lore.kernel.org/bpf/20201201215900.3569844-3-guro@fb.com --- include/linux/memcontrol.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f95c1433461c..c7ac0a5b8989 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -416,6 +416,70 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return (struct mem_cgroup *)memcg_data; } +#ifdef CONFIG_MEMCG_KMEM +/* + * page_objcgs - get the object cgroups vector associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the object cgroups vector associated with the page, + * or NULL. This function assumes that the page is known to have an + * associated object cgroups vector. It's not safe to call this function + * against pages, which might have an associated memory cgroup: e.g. + * kernel stack pages. + */ +static inline struct obj_cgroup **page_objcgs(struct page *page) +{ + return (struct obj_cgroup **)(READ_ONCE(page->memcg_data) & ~0x1UL); +} + +/* + * page_objcgs_check - get the object cgroups vector associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the object cgroups vector associated with the page, + * or NULL. This function is safe to use if the page can be directly associated + * with a memory cgroup. + */ +static inline struct obj_cgroup **page_objcgs_check(struct page *page) +{ + unsigned long memcg_data = READ_ONCE(page->memcg_data); + + if (memcg_data && (memcg_data & 0x1UL)) + return (struct obj_cgroup **)(memcg_data & ~0x1UL); + + return NULL; +} + +/* + * set_page_objcgs - associate a page with a object cgroups vector + * @page: a pointer to the page struct + * @objcgs: a pointer to the object cgroups vector + * + * Atomically associates a page with a vector of object cgroups. + */ +static inline bool set_page_objcgs(struct page *page, + struct obj_cgroup **objcgs) +{ + return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs | 0x1UL); +} +#else +static inline struct obj_cgroup **page_objcgs(struct page *page) +{ + return NULL; +} + +static inline struct obj_cgroup **page_objcgs_check(struct page *page) +{ + return NULL; +} + +static inline bool set_page_objcgs(struct page *page, + struct obj_cgroup **objcgs) +{ + return true; +} +#endif + static __always_inline bool memcg_stat_item_in_bytes(int idx) { if (idx == MEMCG_PERCPU_B) -- cgit v1.2.3 From 87944e2992bd28098c6806086a1e96bb4d0e502b Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:29 -0800 Subject: mm: Introduce page memcg flags The lowest bit in page->memcg_data is used to distinguish between struct memory_cgroup pointer and a pointer to a objcgs array. All checks and modifications of this bit are open-coded. Let's formalize it using page memcg flags, defined in enum page_memcg_data_flags. Additional flags might be added later. Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Alexei Starovoitov Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Michal Hocko Link: https://lkml.kernel.org/r/20201027001657.3398190-4-guro@fb.com Link: https://lore.kernel.org/bpf/20201201215900.3569844-4-guro@fb.com --- include/linux/memcontrol.h | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c7ac0a5b8989..99a4841d658b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -343,6 +343,15 @@ struct mem_cgroup { extern struct mem_cgroup *root_mem_cgroup; +enum page_memcg_data_flags { + /* page->memcg_data is a pointer to an objcgs vector */ + MEMCG_DATA_OBJCGS = (1UL << 0), + /* the next bit after the last actual flag */ + __NR_MEMCG_DATA_FLAGS = (1UL << 1), +}; + +#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) + /* * page_memcg - get the memory cgroup associated with a page * @page: a pointer to the page struct @@ -404,13 +413,7 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) */ unsigned long memcg_data = READ_ONCE(page->memcg_data); - /* - * The lowest bit set means that memcg isn't a valid - * memcg pointer, but a obj_cgroups pointer. - * In this case the page is shared and doesn't belong - * to any specific memory cgroup. - */ - if (memcg_data & 0x1UL) + if (memcg_data & MEMCG_DATA_OBJCGS) return NULL; return (struct mem_cgroup *)memcg_data; @@ -429,7 +432,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) */ static inline struct obj_cgroup **page_objcgs(struct page *page) { - return (struct obj_cgroup **)(READ_ONCE(page->memcg_data) & ~0x1UL); + unsigned long memcg_data = READ_ONCE(page->memcg_data); + + VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page); + + return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* @@ -444,10 +451,10 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) { unsigned long memcg_data = READ_ONCE(page->memcg_data); - if (memcg_data && (memcg_data & 0x1UL)) - return (struct obj_cgroup **)(memcg_data & ~0x1UL); + if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS)) + return NULL; - return NULL; + return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* @@ -460,7 +467,8 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) static inline bool set_page_objcgs(struct page *page, struct obj_cgroup **objcgs) { - return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs | 0x1UL); + return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs | + MEMCG_DATA_OBJCGS); } #else static inline struct obj_cgroup **page_objcgs(struct page *page) -- cgit v1.2.3 From 18b2db3b0385226b71cb3288474fa5a6e4a45474 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:30 -0800 Subject: mm: Convert page kmemcg type to a page memcg flag PageKmemcg flag is currently defined as a page type (like buddy, offline, table and guard). Semantically it means that the page was accounted as a kernel memory by the page allocator and has to be uncharged on the release. As a side effect of defining the flag as a page type, the accounted page can't be mapped to userspace (look at page_has_type() and comments above). In particular, this blocks the accounting of vmalloc-backed memory used by some bpf maps, because these maps do map the memory to userspace. One option is to fix it by complicating the access to page->mapcount, which provides some free bits for page->page_type. But it's way better to move this flag into page->memcg_data flags. Indeed, the flag makes no sense without enabled memory cgroups and memory cgroup pointer set in particular. This commit replaces PageKmemcg() and __SetPageKmemcg() with PageMemcgKmem() and an open-coded OR operation setting the memcg pointer with the MEMCG_DATA_KMEM bit. __ClearPageKmemcg() can be simple deleted, as the whole memcg_data is zeroed at once. As a bonus, on !CONFIG_MEMCG build the PageMemcgKmem() check will be compiled out. Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Alexei Starovoitov Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Michal Hocko Link: https://lkml.kernel.org/r/20201027001657.3398190-5-guro@fb.com Link: https://lore.kernel.org/bpf/20201201215900.3569844-5-guro@fb.com --- include/linux/memcontrol.h | 37 +++++++++++++++++++++++++++++++++---- include/linux/page-flags.h | 11 ++--------- 2 files changed, 35 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 99a4841d658b..7c9d43476166 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -346,8 +346,10 @@ extern struct mem_cgroup *root_mem_cgroup; enum page_memcg_data_flags { /* page->memcg_data is a pointer to an objcgs vector */ MEMCG_DATA_OBJCGS = (1UL << 0), + /* page has been accounted as a non-slab kernel page */ + MEMCG_DATA_KMEM = (1UL << 1), /* the next bit after the last actual flag */ - __NR_MEMCG_DATA_FLAGS = (1UL << 1), + __NR_MEMCG_DATA_FLAGS = (1UL << 2), }; #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) @@ -369,8 +371,12 @@ enum page_memcg_data_flags { */ static inline struct mem_cgroup *page_memcg(struct page *page) { + unsigned long memcg_data = page->memcg_data; + VM_BUG_ON_PAGE(PageSlab(page), page); - return (struct mem_cgroup *)page->memcg_data; + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); + + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* @@ -387,7 +393,8 @@ static inline struct mem_cgroup *page_memcg_rcu(struct page *page) VM_BUG_ON_PAGE(PageSlab(page), page); WARN_ON_ONCE(!rcu_read_lock_held()); - return (struct mem_cgroup *)READ_ONCE(page->memcg_data); + return (struct mem_cgroup *)(READ_ONCE(page->memcg_data) & + ~MEMCG_DATA_FLAGS_MASK); } /* @@ -416,7 +423,21 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) if (memcg_data & MEMCG_DATA_OBJCGS) return NULL; - return (struct mem_cgroup *)memcg_data; + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); +} + +/* + * PageMemcgKmem - check if the page has MemcgKmem flag set + * @page: a pointer to the page struct + * + * Checks if the page has MemcgKmem flag set. The caller must ensure that + * the page has an associated memory cgroup. It's not safe to call this function + * against some types of pages, e.g. slab pages. + */ +static inline bool PageMemcgKmem(struct page *page) +{ + VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page); + return page->memcg_data & MEMCG_DATA_KMEM; } #ifdef CONFIG_MEMCG_KMEM @@ -435,6 +456,7 @@ static inline struct obj_cgroup **page_objcgs(struct page *page) unsigned long memcg_data = READ_ONCE(page->memcg_data); VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page); + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } @@ -454,6 +476,8 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS)) return NULL; + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); + return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } @@ -1109,6 +1133,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return NULL; } +static inline bool PageMemcgKmem(struct page *page) +{ + return false; +} + static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { return true; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 4f6ba9379112..fc0e1bd48e73 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -715,9 +715,8 @@ PAGEFLAG_FALSE(DoubleMap) #define PAGE_MAPCOUNT_RESERVE -128 #define PG_buddy 0x00000080 #define PG_offline 0x00000100 -#define PG_kmemcg 0x00000200 -#define PG_table 0x00000400 -#define PG_guard 0x00000800 +#define PG_table 0x00000200 +#define PG_guard 0x00000400 #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) @@ -768,12 +767,6 @@ PAGE_TYPE_OPS(Buddy, buddy) */ PAGE_TYPE_OPS(Offline, offline) -/* - * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on - * pages allocated with __GFP_ACCOUNT. It gets cleared on page free. - */ -PAGE_TYPE_OPS(Kmemcg, kmemcg) - /* * Marks pages in use as page tables. */ -- cgit v1.2.3 From 48edc1f78aabeba35ed00e40c36f211de89e0090 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:32 -0800 Subject: bpf: Prepare for memcg-based memory accounting for bpf maps Bpf maps can be updated from an interrupt context and in such case there is no process which can be charged. It makes the memory accounting of bpf maps non-trivial. Fortunately, after commit 4127c6504f25 ("mm: kmem: enable kernel memcg accounting from interrupt contexts") and commit b87d8cefe43c ("mm, memcg: rework remote charging API to support nesting") it's finally possible. To make the ownership model simple and consistent, when the map is created, the memory cgroup of the current process is recorded. All subsequent allocations related to the bpf map are charged to the same memory cgroup. It includes allocations made by any processes (even if they do belong to a different cgroup) and from interrupts. This commit introduces 3 new helpers, which will be used by following commits to enable the accounting of bpf maps memory: - bpf_map_kmalloc_node() - bpf_map_kzalloc() - bpf_map_alloc_percpu() They are wrapping popular memory allocation functions. They set the active memory cgroup to the map's memory cgroup and add __GFP_ACCOUNT to the passed gfp flags. Then they call into the corresponding memory allocation function and restore the original active memory cgroup. These helpers are supposed to use everywhere except the map creation path. During the map creation when the map structure is allocated by itself, it cannot be passed to those helpers. In those cases default memory allocation function will be used with the __GFP_ACCOUNT flag. Signed-off-by: Roman Gushchin Acked-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201201215900.3569844-7-guro@fb.com --- include/linux/bpf.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e1bcb6d7345c..e1f2c95c15ec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -20,6 +20,8 @@ #include #include #include +#include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -37,6 +39,7 @@ struct bpf_iter_aux_info; struct bpf_local_storage; struct bpf_local_storage_map; struct kobject; +struct mem_cgroup; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -161,6 +164,9 @@ struct bpf_map { u32 btf_value_type_id; struct btf *btf; struct bpf_map_memory memory; +#ifdef CONFIG_MEMCG_KMEM + struct mem_cgroup *memcg; +#endif char name[BPF_OBJ_NAME_LEN]; u32 btf_vmlinux_value_type_id; bool bypass_spec_v1; @@ -1240,6 +1246,34 @@ int generic_map_delete_batch(struct bpf_map *map, struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); +#ifdef CONFIG_MEMCG_KMEM +void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, + int node); +void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags); +void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, + size_t align, gfp_t flags); +#else +static inline void * +bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, + int node) +{ + return kmalloc_node(size, flags, node); +} + +static inline void * +bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) +{ + return kzalloc(size, flags); +} + +static inline void __percpu * +bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, + gfp_t flags) +{ + return __alloc_percpu_gfp(size, align, flags); +} +#endif + extern int sysctl_unprivileged_bpf_disabled; static inline bool bpf_allow_ptr_leaks(void) -- cgit v1.2.3 From 80ee81e0403c48f4eb342f7c8d40477c89b8836a Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:58 -0800 Subject: bpf: Eliminate rlimit-based memory accounting infra for bpf maps Remove rlimit-based accounting infrastructure code, which is not used anymore. To provide a backward compatibility, use an approximation of the bpf map memory footprint as a "memlock" value, available to a user via map info. The approximation is based on the maximal number of elements and key and value sizes. Signed-off-by: Roman Gushchin Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201201215900.3569844-33-guro@fb.com --- include/linux/bpf.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e1f2c95c15ec..61331a148cde 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -138,11 +138,6 @@ struct bpf_map_ops { const struct bpf_iter_seq_info *iter_seq_info; }; -struct bpf_map_memory { - u32 pages; - struct user_struct *user; -}; - struct bpf_map { /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). @@ -163,7 +158,6 @@ struct bpf_map { u32 btf_key_type_id; u32 btf_value_type_id; struct btf *btf; - struct bpf_map_memory memory; #ifdef CONFIG_MEMCG_KMEM struct mem_cgroup *memcg; #endif @@ -1224,12 +1218,6 @@ void bpf_map_inc_with_uref(struct bpf_map *map); struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); -int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); -void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); -int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size); -void bpf_map_charge_finish(struct bpf_map_memory *mem); -void bpf_map_charge_move(struct bpf_map_memory *dst, - struct bpf_map_memory *src); void *bpf_map_area_alloc(u64 size, int numa_node); void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); void bpf_map_area_free(void *base); -- cgit v1.2.3 From 3ac1f01b43b6e2759cc34d3a715ba5eed04c5805 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:59 -0800 Subject: bpf: Eliminate rlimit-based memory accounting for bpf progs Do not use rlimit-based memory accounting for bpf progs. It has been replaced with memcg-based memory accounting. Signed-off-by: Roman Gushchin Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201201215900.3569844-34-guro@fb.com --- include/linux/bpf.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 61331a148cde..a9de5711b23f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1202,8 +1202,6 @@ void bpf_prog_sub(struct bpf_prog *prog, int i); void bpf_prog_inc(struct bpf_prog *prog); struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); -int __bpf_prog_charge(struct user_struct *user, u32 pages); -void __bpf_prog_uncharge(struct user_struct *user, u32 pages); void __bpf_free_used_maps(struct bpf_prog_aux *aux, struct bpf_map **used_maps, u32 len); @@ -1512,15 +1510,6 @@ bpf_prog_inc_not_zero(struct bpf_prog *prog) return ERR_PTR(-EOPNOTSUPP); } -static inline int __bpf_prog_charge(struct user_struct *user, u32 pages) -{ - return 0; -} - -static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) -{ -} - static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog) -- cgit v1.2.3 From 608af703519a58f5a7da4273809211cac27edfb2 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 19 Nov 2020 06:09:02 +0000 Subject: libfs: Add generic function for setting dentry_ops This adds a function to set dentry operations at lookup time that will work for both encrypted filenames and casefolded filenames. A filesystem that supports both features simultaneously can use this function during lookup preparations to set up its dentry operations once fscrypt no longer does that itself. Currently the casefolding dentry operation are always set if the filesystem defines an encoding because the features is toggleable on empty directories. Unlike in the encryption case, the dentry operations used come from the parent. Since we don't know what set of functions we'll eventually need, and cannot change them later, we enable the casefolding operations if the filesystem supports them at all. By splitting out the various cases, we support as few dentry operations as we can get away with, maximizing compatibility with overlayfs, which will not function if a filesystem supports certain dentry_operations. Signed-off-by: Daniel Rosenberg Reviewed-by: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jaegeuk Kim --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 21cc971fd960..4a25ab4dbd3e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3186,6 +3186,7 @@ extern int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str); extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); #endif +extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry); #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, -- cgit v1.2.3 From bb9cd9106b22b4fc5ff8d78a752be8a4ba2cbba5 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 19 Nov 2020 06:09:03 +0000 Subject: fscrypt: Have filesystems handle their d_ops This shifts the responsibility of setting up dentry operations from fscrypt to the individual filesystems, allowing them to have their own operations while still setting fscrypt's d_revalidate as appropriate. Most filesystems can just use generic_set_encrypted_ci_d_ops, unless they have their own specific dentry operations as well. That operation will set the minimal d_ops required under the circumstances. Since the fscrypt d_ops are set later on, we must set all d_ops there, since we cannot adjust those later on. This should not result in any change in behavior. Signed-off-by: Daniel Rosenberg Acked-by: Theodore Ts'o Acked-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- include/linux/fscrypt.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index a8f7a43f031b..e72f80482671 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -741,8 +741,11 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir, * directory's encryption key is available, then the lookup is assumed to be by * plaintext name; otherwise, it is assumed to be by no-key name. * - * This also installs a custom ->d_revalidate() method which will invalidate the - * dentry if it was created without the key and the key is later added. + * This will set DCACHE_NOKEY_NAME on the dentry if the lookup is by no-key + * name. In this case the filesystem must assign the dentry a dentry_operations + * which contains fscrypt_d_revalidate (or contains a d_revalidate method that + * calls fscrypt_d_revalidate), so that the dentry will be invalidated if the + * directory's encryption key is later added. * * Return: 0 on success; -ENOENT if the directory's key is unavailable but the * filename isn't a valid no-key name, so a negative dentry should be created; -- cgit v1.2.3 From b28f047b28c51d0b9864c34b097bb0b221ea7247 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 26 Nov 2020 18:32:09 +0800 Subject: f2fs: compress: support chksum This patch supports to store chksum value with compressed data, and verify the integrality of compressed data while reading the data. The feature can be enabled through specifying mount option 'compress_chksum'. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a5dbb57a687f..7dc2a06cf19a 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -273,7 +273,7 @@ struct f2fs_inode { __le64 i_compr_blocks; /* # of compressed blocks */ __u8 i_compress_algorithm; /* compress algorithm */ __u8 i_log_cluster_size; /* log of cluster size */ - __le16 i_padding; /* padding */ + __le16 i_compress_flag; /* compress flag */ __le32 i_extra_end[0]; /* for attribute size calculation */ } __packed; __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ -- cgit v1.2.3 From 41a340941854c4606a9b71b9d68db412747e7c84 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 2 Dec 2020 15:13:26 +0100 Subject: media: coda: Convert the driver to DT-only Since 5.10-rc1 i.MX is a devicetree-only platform, so simplify the code by removing the unused non-DT support. Signed-off-by: Fabio Estevam Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/platform_data/media/coda.h | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 include/linux/platform_data/media/coda.h (limited to 'include/linux') diff --git a/include/linux/platform_data/media/coda.h b/include/linux/platform_data/media/coda.h deleted file mode 100644 index 293b61b60c9d..000000000000 --- a/include/linux/platform_data/media/coda.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2013 Philipp Zabel, Pengutronix - */ -#ifndef PLATFORM_CODA_H -#define PLATFORM_CODA_H - -struct device; - -struct coda_platform_data { - struct device *iram_dev; -}; - -#endif -- cgit v1.2.3 From 2a4a06da8a4b93dd189171eed7a99fffd38f42f3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Nov 2020 11:41:40 +0100 Subject: mm/gup: Provide gup_get_pte() more generic In order to write another lockless page-table walker, we need gup_get_pte() exposed. While doing that, rename it to ptep_get_lockless() to match the existing ptep_get() naming. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201126121121.036370527@infradead.org --- include/linux/pgtable.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 71125a4676c4..ed9266cc115b 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -258,6 +258,61 @@ static inline pte_t ptep_get(pte_t *ptep) } #endif +#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH +/* + * WARNING: only to be used in the get_user_pages_fast() implementation. + * + * With get_user_pages_fast(), we walk down the pagetables without taking any + * locks. For this we would like to load the pointers atomically, but sometimes + * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What + * we do have is the guarantee that a PTE will only either go from not present + * to present, or present to not present or both -- it will not switch to a + * completely different present page without a TLB flush in between; something + * that we are blocking by holding interrupts off. + * + * Setting ptes from not present to present goes: + * + * ptep->pte_high = h; + * smp_wmb(); + * ptep->pte_low = l; + * + * And present to not present goes: + * + * ptep->pte_low = 0; + * smp_wmb(); + * ptep->pte_high = 0; + * + * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'. + * We load pte_high *after* loading pte_low, which ensures we don't see an older + * value of pte_high. *Then* we recheck pte_low, which ensures that we haven't + * picked up a changed pte high. We might have gotten rubbish values from + * pte_low and pte_high, but we are guaranteed that pte_low will not have the + * present bit set *unless* it is 'l'. Because get_user_pages_fast() only + * operates on present ptes we're safe. + */ +static inline pte_t ptep_get_lockless(pte_t *ptep) +{ + pte_t pte; + + do { + pte.pte_low = ptep->pte_low; + smp_rmb(); + pte.pte_high = ptep->pte_high; + smp_rmb(); + } while (unlikely(pte.pte_low != ptep->pte_low)); + + return pte; +} +#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */ +/* + * We require that the PTE can be read atomically. + */ +static inline pte_t ptep_get_lockless(pte_t *ptep) +{ + return ptep_get(ptep); +} +#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, -- cgit v1.2.3 From 560dabbdf68bb15f9e241af8f828b1c8c38d6c6f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Nov 2020 11:45:36 +0100 Subject: mm: Introduce pXX_leaf_size() A number of architectures have non-pagetable aligned huge/large pages. For such architectures a leaf can actually be part of a larger entry. Provide generic helpers to determine the size of a page-table leaf. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Matthew Wilcox (Oracle) Link: https://lkml.kernel.org/r/20201126121121.102580109@infradead.org --- include/linux/pgtable.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index ed9266cc115b..fefbbdbbb3f3 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1536,4 +1536,20 @@ typedef unsigned int pgtbl_mod_mask; #define pmd_leaf(x) 0 #endif +#ifndef pgd_leaf_size +#define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT) +#endif +#ifndef p4d_leaf_size +#define p4d_leaf_size(x) P4D_SIZE +#endif +#ifndef pud_leaf_size +#define pud_leaf_size(x) PUD_SIZE +#endif +#ifndef pmd_leaf_size +#define pmd_leaf_size(x) PMD_SIZE +#endif +#ifndef pte_leaf_size +#define pte_leaf_size(x) PAGE_SIZE +#endif + #endif /* _LINUX_PGTABLE_H */ -- cgit v1.2.3 From a07c45312f06e288417049208c344ad76074627d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:50:38 +0100 Subject: seqlock: avoid -Wshadow warnings When building with W=2, there is a flood of warnings about the seqlock macros shadowing local variables: 19806 linux/seqlock.h:331:11: warning: declaration of 'seq' shadows a previous local [-Wshadow] 48 linux/seqlock.h:348:11: warning: declaration of 'seq' shadows a previous local [-Wshadow] 8 linux/seqlock.h:379:11: warning: declaration of 'seq' shadows a previous local [-Wshadow] Prefix the local variables to make the warning useful elsewhere again. Fixes: 52ac39e5db51 ("seqlock: seqcount_t: Implement all read APIs as statement expressions") Signed-off-by: Arnd Bergmann Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201026165044.3722931-1-arnd@kernel.org --- include/linux/seqlock.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index cbfc78b92b65..8d8552474c64 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -328,13 +328,13 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu */ #define __read_seqcount_begin(s) \ ({ \ - unsigned seq; \ + unsigned __seq; \ \ - while ((seq = __seqcount_sequence(s)) & 1) \ + while ((__seq = __seqcount_sequence(s)) & 1) \ cpu_relax(); \ \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ - seq; \ + __seq; \ }) /** @@ -345,10 +345,10 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu */ #define raw_read_seqcount_begin(s) \ ({ \ - unsigned seq = __read_seqcount_begin(s); \ + unsigned _seq = __read_seqcount_begin(s); \ \ smp_rmb(); \ - seq; \ + _seq; \ }) /** @@ -376,11 +376,11 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu */ #define raw_read_seqcount(s) \ ({ \ - unsigned seq = __seqcount_sequence(s); \ + unsigned __seq = __seqcount_sequence(s); \ \ smp_rmb(); \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ - seq; \ + __seq; \ }) /** -- cgit v1.2.3 From ab440b2c604b60fe90885270fcfeb5c3dd5d6fae Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Nov 2020 13:44:17 +0100 Subject: seqlock: Rename __seqprop() users More consistent naming should make it easier to untangle the _Generic token pasting maze called __seqprop(). Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201110115358.GE2594@hirez.programming.kicks-ass.net --- include/linux/seqlock.h | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 8d8552474c64..d89134c74fba 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -307,10 +307,10 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu __seqprop_case((s), mutex, prop), \ __seqprop_case((s), ww_mutex, prop)) -#define __seqcount_ptr(s) __seqprop(s, ptr) -#define __seqcount_sequence(s) __seqprop(s, sequence) -#define __seqcount_lock_preemptible(s) __seqprop(s, preemptible) -#define __seqcount_assert_lock_held(s) __seqprop(s, assert) +#define seqprop_ptr(s) __seqprop(s, ptr) +#define seqprop_sequence(s) __seqprop(s, sequence) +#define seqprop_preemptible(s) __seqprop(s, preemptible) +#define seqprop_assert(s) __seqprop(s, assert) /** * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier @@ -330,7 +330,7 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu ({ \ unsigned __seq; \ \ - while ((__seq = __seqcount_sequence(s)) & 1) \ + while ((__seq = seqprop_sequence(s)) & 1) \ cpu_relax(); \ \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ @@ -359,7 +359,7 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu */ #define read_seqcount_begin(s) \ ({ \ - seqcount_lockdep_reader_access(__seqcount_ptr(s)); \ + seqcount_lockdep_reader_access(seqprop_ptr(s)); \ raw_read_seqcount_begin(s); \ }) @@ -376,7 +376,7 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu */ #define raw_read_seqcount(s) \ ({ \ - unsigned __seq = __seqcount_sequence(s); \ + unsigned __seq = seqprop_sequence(s); \ \ smp_rmb(); \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ @@ -425,7 +425,7 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu * Return: true if a read section retry is required, else false */ #define __read_seqcount_retry(s, start) \ - __read_seqcount_t_retry(__seqcount_ptr(s), start) + __read_seqcount_t_retry(seqprop_ptr(s), start) static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) { @@ -445,7 +445,7 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) * Return: true if a read section retry is required, else false */ #define read_seqcount_retry(s, start) \ - read_seqcount_t_retry(__seqcount_ptr(s), start) + read_seqcount_t_retry(seqprop_ptr(s), start) static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start) { @@ -459,10 +459,10 @@ static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start) */ #define raw_write_seqcount_begin(s) \ do { \ - if (__seqcount_lock_preemptible(s)) \ + if (seqprop_preemptible(s)) \ preempt_disable(); \ \ - raw_write_seqcount_t_begin(__seqcount_ptr(s)); \ + raw_write_seqcount_t_begin(seqprop_ptr(s)); \ } while (0) static inline void raw_write_seqcount_t_begin(seqcount_t *s) @@ -478,9 +478,9 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s) */ #define raw_write_seqcount_end(s) \ do { \ - raw_write_seqcount_t_end(__seqcount_ptr(s)); \ + raw_write_seqcount_t_end(seqprop_ptr(s)); \ \ - if (__seqcount_lock_preemptible(s)) \ + if (seqprop_preemptible(s)) \ preempt_enable(); \ } while (0) @@ -501,12 +501,12 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s) */ #define write_seqcount_begin_nested(s, subclass) \ do { \ - __seqcount_assert_lock_held(s); \ + seqprop_assert(s); \ \ - if (__seqcount_lock_preemptible(s)) \ + if (seqprop_preemptible(s)) \ preempt_disable(); \ \ - write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass); \ + write_seqcount_t_begin_nested(seqprop_ptr(s), subclass); \ } while (0) static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass) @@ -528,12 +528,12 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass) */ #define write_seqcount_begin(s) \ do { \ - __seqcount_assert_lock_held(s); \ + seqprop_assert(s); \ \ - if (__seqcount_lock_preemptible(s)) \ + if (seqprop_preemptible(s)) \ preempt_disable(); \ \ - write_seqcount_t_begin(__seqcount_ptr(s)); \ + write_seqcount_t_begin(seqprop_ptr(s)); \ } while (0) static inline void write_seqcount_t_begin(seqcount_t *s) @@ -549,9 +549,9 @@ static inline void write_seqcount_t_begin(seqcount_t *s) */ #define write_seqcount_end(s) \ do { \ - write_seqcount_t_end(__seqcount_ptr(s)); \ + write_seqcount_t_end(seqprop_ptr(s)); \ \ - if (__seqcount_lock_preemptible(s)) \ + if (seqprop_preemptible(s)) \ preempt_enable(); \ } while (0) @@ -603,7 +603,7 @@ static inline void write_seqcount_t_end(seqcount_t *s) * } */ #define raw_write_seqcount_barrier(s) \ - raw_write_seqcount_t_barrier(__seqcount_ptr(s)) + raw_write_seqcount_t_barrier(seqprop_ptr(s)) static inline void raw_write_seqcount_t_barrier(seqcount_t *s) { @@ -623,7 +623,7 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s) * will complete successfully and see data older than this. */ #define write_seqcount_invalidate(s) \ - write_seqcount_t_invalidate(__seqcount_ptr(s)) + write_seqcount_t_invalidate(seqprop_ptr(s)) static inline void write_seqcount_t_invalidate(seqcount_t *s) { -- cgit v1.2.3 From b6498aad59b091e5618a9f05e7636e2ad2c6732d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 1 Dec 2020 13:09:00 +0100 Subject: completion: Drop init_completion define Changeset cd8084f91c02 ("locking/lockdep: Apply crossrelease to completions") added a CONFIG_LOCKDEP_COMPLETE (that was later renamed to CONFIG_LOCKDEP_COMPLETIONS). Such changeset renamed the init_completion, and add a macro that would either run a modified version or the original code. However, such code reported too many false positives. So, it ended being dropped later on by changeset e966eaeeb623 ("locking/lockdep: Remove the cross-release locking checks"). Yet, the define remained there as just: #define init_completion(x) __init_completion(x) Get rid of the define, and return __init_completion() function to its original name. Fixes: e966eaeeb623 ("locking/lockdep: Remove the cross-release locking checks") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/e657bfc533545c185b1c3c55926a449ead56a88b.1606823973.git.mchehab+huawei@kernel.org --- include/linux/completion.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/completion.h b/include/linux/completion.h index bf8e77001f18..51d9ab079629 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -28,8 +28,7 @@ struct completion { struct swait_queue_head wait; }; -#define init_completion_map(x, m) __init_completion(x) -#define init_completion(x) __init_completion(x) +#define init_completion_map(x, m) init_completion(x) static inline void complete_acquire(struct completion *x) {} static inline void complete_release(struct completion *x) {} @@ -82,7 +81,7 @@ static inline void complete_release(struct completion *x) {} * This inline function will initialize a dynamically created completion * structure. */ -static inline void __init_completion(struct completion *x) +static inline void init_completion(struct completion *x) { x->done = 0; init_swait_queue_head(&x->wait); -- cgit v1.2.3 From 97d62caa32d6d79dadae3f8d19af5c92ea9a589a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 1 Dec 2020 13:09:08 +0100 Subject: refcount: Fix a kernel-doc markup The kernel-doc markup is wrong: it is asking the tool to document struct refcount_struct, instead of documenting typedef refcount_t. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Peter Zijlstra (Intel) Acked-by: Kees Cook Link: https://lkml.kernel.org/r/afb9bb1e675bf5f72a34a55d780779d7d5916b4c.1606823973.git.mchehab+huawei@kernel.org --- include/linux/refcount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 497990c69b0b..b8a6e387f8f9 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -101,7 +101,7 @@ struct mutex; /** - * struct refcount_t - variant of atomic_t specialized for reference counts + * typedef refcount_t - variant of atomic_t specialized for reference counts * @refs: atomic_t counter field * * The counter saturates at REFCOUNT_SATURATED and will not move once -- cgit v1.2.3 From 950cc0d2bef078e1f6459900ca4d4b2a2e0e3c37 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 2 Dec 2020 14:07:07 +0200 Subject: fsnotify: generalize handle_inode_event() The handle_inode_event() interface was added as (quoting comment): "a simple variant of handle_event() for groups that only have inode marks and don't have ignore mask". In other words, all backends except fanotify. The inotify backend also falls under this category, but because it required extra arguments it was left out of the initial pass of backends conversion to the simple interface. This results in code duplication between the generic helper fsnotify_handle_event() and the inotify_handle_event() callback which also happen to be buggy code. Generalize the handle_inode_event() arguments and add the check for FS_EXCL_UNLINK flag to the generic helper, so inotify backend could be converted to use the simple interface. Link: https://lore.kernel.org/r/20201202120713.702387-2-amir73il@gmail.com CC: stable@vger.kernel.org Fixes: b9a1b9772509 ("fsnotify: create method handle_inode_event() in fsnotify_operations") Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index f8529a3a2923..4ee3044eedd0 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -137,6 +137,7 @@ struct mem_cgroup; * if @file_name is not NULL, this is the directory that * @file_name is relative to. * @file_name: optional file name associated with event + * @cookie: inotify rename cookie * * free_group_priv - called when a group refcnt hits 0 to clean up the private union * freeing_mark - called when a mark is being destroyed for some reason. The group @@ -151,7 +152,7 @@ struct fsnotify_ops { struct fsnotify_iter_info *iter_info); int (*handle_inode_event)(struct fsnotify_mark *mark, u32 mask, struct inode *inode, struct inode *dir, - const struct qstr *file_name); + const struct qstr *file_name, u32 cookie); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); void (*free_event)(struct fsnotify_event *event); -- cgit v1.2.3 From 50a4952fd67b7f7f551e82ac07c51c1a7a74d474 Mon Sep 17 00:00:00 2001 From: Alexander Lochmann Date: Thu, 15 Oct 2020 15:24:52 +0200 Subject: Updated locking documentation for transaction_t We used LockDoc to derive locking rules for each member of struct transaction_t. Based on those results, we extended the existing documentation by more members of struct transaction_t, and updated the existing documentation. Link: https://lore.kernel.org/r/10cfbef1-994c-c604-f8a6-b1042fcc622f@tu-dortmund.de Signed-off-by: Alexander Lochmann Signed-off-by: Horst Schirmeier Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 578ff196b3ce..d2a4860feb72 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -538,6 +538,7 @@ struct transaction_chp_stats_s { * The transaction keeps track of all of the buffers modified by a * running transaction, and all of the buffers committed but not yet * flushed to home for finished transactions. + * (Locking Documentation improved by LockDoc) */ /* @@ -658,12 +659,12 @@ struct transaction_s unsigned long t_start; /* - * When commit was requested + * When commit was requested [j_state_lock] */ unsigned long t_requested; /* - * Checkpointing stats [j_checkpoint_sem] + * Checkpointing stats [j_list_lock] */ struct transaction_chp_stats_s t_chp_stats; -- cgit v1.2.3 From 14026b94ccfe626e512bc9fa01e0e72ee75c7a98 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Aug 2020 17:19:32 +0000 Subject: signal: Add unsafe_put_compat_sigset() Implement 'unsafe' version of put_compat_sigset() For the bigendian, use unsafe_put_user() directly to avoid intermediate copy through the stack. For the littleendian, use a straight unsafe_copy_to_user(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/537c7082ee309a0bb9c67a50c5d9dd929aedb82d.1597770847.git.christophe.leroy@csgroup.eu --- include/linux/compat.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 14d514233e1d..400c0941c8af 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -442,6 +442,38 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, #endif } +#ifdef CONFIG_CPU_BIG_ENDIAN +#define unsafe_put_compat_sigset(compat, set, label) do { \ + compat_sigset_t __user *__c = compat; \ + const sigset_t *__s = set; \ + \ + switch (_NSIG_WORDS) { \ + case 4: \ + unsafe_put_user(__s->sig[3] >> 32, &__c->sig[7], label); \ + unsafe_put_user(__s->sig[3], &__c->sig[6], label); \ + fallthrough; \ + case 3: \ + unsafe_put_user(__s->sig[2] >> 32, &__c->sig[5], label); \ + unsafe_put_user(__s->sig[2], &__c->sig[4], label); \ + fallthrough; \ + case 2: \ + unsafe_put_user(__s->sig[1] >> 32, &__c->sig[3], label); \ + unsafe_put_user(__s->sig[1], &__c->sig[2], label); \ + fallthrough; \ + case 1: \ + unsafe_put_user(__s->sig[0] >> 32, &__c->sig[1], label); \ + unsafe_put_user(__s->sig[0], &__c->sig[0], label); \ + } \ +} while (0) +#else +#define unsafe_put_compat_sigset(compat, set, label) do { \ + compat_sigset_t __user *__c = compat; \ + const sigset_t *__s = set; \ + \ + unsafe_copy_to_user(__c, __s, sizeof(*__c), label); \ +} while (0) +#endif + extern int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); -- cgit v1.2.3 From d4bff72c8401e6f56194ecf455db70ebc22929e2 Mon Sep 17 00:00:00 2001 From: Thomas Karlsson Date: Wed, 2 Dec 2020 19:49:58 +0100 Subject: macvlan: Support for high multicast packet rate Background: Broadcast and multicast packages are enqueued for later processing. This queue was previously hardcoded to 1000. This proved insufficient for handling very high packet rates. This resulted in packet drops for multicast. While at the same time unicast worked fine. The change: This patch make the queue length adjustable to accommodate for environments with very high multicast packet rate. But still keeps the default value of 1000 unless specified. The queue length is specified as a request per macvlan using the IFLA_MACVLAN_BC_QUEUE_LEN parameter. The actual used queue length will then be the maximum of any macvlan connected to the same port. The actual used queue length for the port can be retrieved (read only) by the IFLA_MACVLAN_BC_QUEUE_LEN_USED parameter for verification. This will be followed up by a patch to iproute2 in order to adjust the parameter from userspace. Signed-off-by: Thomas Karlsson Link: https://lore.kernel.org/r/dd4673b2-7eab-edda-6815-85c67ce87f63@paneda.se Signed-off-by: Jakub Kicinski --- include/linux/if_macvlan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index a367ead4bf4b..96556c64c95d 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -30,6 +30,7 @@ struct macvlan_dev { enum macvlan_mode mode; u16 flags; unsigned int macaddr_count; + u32 bc_queue_len_req; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; #endif -- cgit v1.2.3 From d421e466c2373095f165ddd25cbabd6c5b077928 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 2 Dec 2020 20:39:46 -0800 Subject: net/mlx5: DR, Proper handling of unsupported Connect-X6DX SW steering STEs format for Connect-X5 and Connect-X6DX different. Currently, on Connext-X6DX the SW steering would break at some point when building STEs w/o giving a proper error message. Fix this by checking the STE format of the current device when initializing domain: add mlx5_ifc definitions for Connect-X6DX SW steering, read FW capability to get the current format version, and check this version when domain is being created. Fixes: 26d688e33f88 ("net/mlx5: DR, Add Steering entry (STE) utilities") Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a092346c7b2d..233352447b1a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1223,6 +1223,11 @@ enum mlx5_fc_bulk_alloc_bitmask { #define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum)) +enum { + MLX5_STEERING_FORMAT_CONNECTX_5 = 0, + MLX5_STEERING_FORMAT_CONNECTX_6DX = 1, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x30]; u8 vhca_id[0x10]; @@ -1521,7 +1526,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 general_obj_types[0x40]; - u8 reserved_at_440[0x20]; + u8 reserved_at_440[0x4]; + u8 steering_format_version[0x4]; + u8 create_qp_start_hint[0x18]; u8 reserved_at_460[0x3]; u8 log_max_uctx[0x5]; -- cgit v1.2.3 From 0fb6ee8d0b5e90b72f870f76debc8bd31a742014 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 24 Nov 2020 14:38:07 +0200 Subject: iio: ad_sigma_delta: Don't put SPI transfer buffer on the stack Use a heap allocated memory for the SPI transfer buffer. Using stack memory can corrupt stack memory when using DMA on some systems. This change moves the buffer from the stack of the trigger handler call to the heap of the buffer of the state struct. The size increases takes into account the alignment for the timestamp, which is 8 bytes. The 'data' buffer is split into 'tx_buf' and 'rx_buf', to make a clearer separation of which part of the buffer should be used for TX & RX. Fixes: af3008485ea03 ("iio:adc: Add common code for ADI Sigma Delta devices") Signed-off-by: Lars-Peter Clausen Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201124123807.19717-1-alexandru.ardelean@analog.com Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/adc/ad_sigma_delta.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index a3a838dcf8e4..7199280d89ca 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -79,8 +79,12 @@ struct ad_sigma_delta { /* * DMA (thus cache coherency maintenance) requires the * transfer buffers to live in their own cache lines. + * 'tx_buf' is up to 32 bits. + * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp, + * rounded to 16 bytes to take into account padding. */ - uint8_t data[4] ____cacheline_aligned; + uint8_t tx_buf[4] ____cacheline_aligned; + uint8_t rx_buf[16] __aligned(8); }; static inline int ad_sigma_delta_set_channel(struct ad_sigma_delta *sd, -- cgit v1.2.3 From eca8523a388f65cc0f515e3db4f3b027d7546532 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 20 Sep 2020 14:25:48 +0100 Subject: iio:trigger: rename try_reenable() to reenable() plus return void As we no longer support a try again if we cannot reenable the trigger rename the function to reflect this. Also we don't do anything with the value returned so stop it returning anything. For the few drivers that didn't already print an error message in this patch, add such a print. Signed-off-by: Jonathan Cameron Reviewed-by: Lars-Peter Clausen Acked-by: Linus Walleij Acked-by: Srinivas Pandruvada Cc: Linus Walleij Cc: Srinivas Pandruvada Cc: Christian Oder Cc: Eugen Hristev Cc: Nishant Malpani Cc: Daniel Baluta Link: https://lore.kernel.org/r/20200920132548.196452-3-jic23@kernel.org --- include/linux/iio/trigger.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index f930c4ccf378..055890b6ffcf 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -21,7 +21,7 @@ struct iio_trigger; /** * struct iio_trigger_ops - operations structure for an iio_trigger. * @set_trigger_state: switch on/off the trigger on demand - * @try_reenable: function to reenable the trigger when the + * @reenable: function to reenable the trigger when the * use count is zero (may be NULL) * @validate_device: function to validate the device when the * current trigger gets changed. @@ -31,7 +31,7 @@ struct iio_trigger; **/ struct iio_trigger_ops { int (*set_trigger_state)(struct iio_trigger *trig, bool state); - int (*try_reenable)(struct iio_trigger *trig); + void (*reenable)(struct iio_trigger *trig); int (*validate_device)(struct iio_trigger *trig, struct iio_dev *indio_dev); }; -- cgit v1.2.3 From 41dd9596d6b239a125c3d19f9d0ca90bdbfbf876 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 30 Nov 2020 16:36:29 +0100 Subject: security: add const qualifier to struct sock in various places A followup change to tcp_request_sock_op would have to drop the 'const' qualifier from the 'route_req' function as the 'security_inet_conn_request' call is moved there - and that function expects a 'struct sock *'. However, it turns out its also possible to add a const qualifier to security_inet_conn_request instead. Signed-off-by: Florian Westphal Acked-by: James Morris Signed-off-by: Jakub Kicinski --- include/linux/lsm_audit.h | 2 +- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 28f23b341c1c..cd23355d2271 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -26,7 +26,7 @@ struct lsm_network_audit { int netif; - struct sock *sk; + const struct sock *sk; u16 family; __be16 dport; __be16 sport; diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 32a940117e7a..acc0494cceba 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -301,7 +301,7 @@ LSM_HOOK(void, LSM_RET_VOID, sk_clone_security, const struct sock *sk, struct sock *newsk) LSM_HOOK(void, LSM_RET_VOID, sk_getsecid, struct sock *sk, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, sock_graft, struct sock *sk, struct socket *parent) -LSM_HOOK(int, 0, inet_conn_request, struct sock *sk, struct sk_buff *skb, +LSM_HOOK(int, 0, inet_conn_request, const struct sock *sk, struct sk_buff *skb, struct request_sock *req) LSM_HOOK(void, LSM_RET_VOID, inet_csk_clone, struct sock *newsk, const struct request_sock *req) diff --git a/include/linux/security.h b/include/linux/security.h index bc2725491560..0df62735651b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1358,7 +1358,7 @@ void security_sk_clone(const struct sock *sk, struct sock *newsk); void security_sk_classify_flow(struct sock *sk, struct flowi *fl); void security_req_classify_flow(const struct request_sock *req, struct flowi *fl); void security_sock_graft(struct sock*sk, struct socket *parent); -int security_inet_conn_request(struct sock *sk, +int security_inet_conn_request(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); void security_inet_csk_clone(struct sock *newsk, const struct request_sock *req); @@ -1519,7 +1519,7 @@ static inline void security_sock_graft(struct sock *sk, struct socket *parent) { } -static inline int security_inet_conn_request(struct sock *sk, +static inline int security_inet_conn_request(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { return 0; -- cgit v1.2.3 From a15ac665b9e9c90b1557499f2a46c1e89d29154a Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 3 Dec 2020 22:35:11 +0100 Subject: vfio-mdev: Wire in a request handler for mdev parent While performing some destructive tests with vfio-ccw, where the paths to a device are forcible removed and thus the device itself is unreachable, it is rather easy to end up in an endless loop in vfio_del_group_dev() due to the lack of a request callback for the associated device. In this example, one MDEV (77c) is used by a guest, while another (77b) is not. The symptom is that the iommu is detached from the mdev for 77b, but not 77c, until that guest is shutdown: [ 238.794867] vfio_ccw 0.0.077b: MDEV: Unregistering [ 238.794996] vfio_mdev 11f2d2bc-4083-431d-a023-eff72715c4f0: Removing from iommu group 2 [ 238.795001] vfio_mdev 11f2d2bc-4083-431d-a023-eff72715c4f0: MDEV: detaching iommu [ 238.795036] vfio_ccw 0.0.077c: MDEV: Unregistering ...silence... Let's wire in the request call back to the mdev device, so that a device being physically removed from the host can be (gracefully?) handled by the parent device at the time the device is removed. Add a message when registering the device if a driver doesn't provide this callback, so a clue is given that this same loop may be encountered in a similar situation, and a message when this occurs instead of the awkward silence noted above. Signed-off-by: Eric Farman Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- include/linux/mdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 0ce30ca78db0..9004375c462e 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -72,6 +72,9 @@ struct device *mdev_get_iommu_device(struct device *dev); * @mmap: mmap callback * @mdev: mediated device structure * @vma: vma structure + * @request: request callback to release device + * @mdev: mediated device structure + * @count: request sequence number * Parent device that support mediated device should be registered with mdev * module with mdev_parent_ops structure. **/ @@ -92,6 +95,7 @@ struct mdev_parent_ops { long (*ioctl)(struct mdev_device *mdev, unsigned int cmd, unsigned long arg); int (*mmap)(struct mdev_device *mdev, struct vm_area_struct *vma); + void (*request)(struct mdev_device *mdev, unsigned int count); }; /* interface for exporting mdev supported type attributes */ -- cgit v1.2.3 From 22dc4a0f5ed11b6dc8fd73a0892fa0ea1a4c3cdf Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:29 -0800 Subject: bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead, wherever BTF type IDs are involved, also track the instance of struct btf that goes along with the type ID. This allows to gradually add support for kernel module BTFs and using/tracking module types across BPF helper calls and registers. This patch also renames btf_id() function to btf_obj_id() to minimize naming clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID. Also, altough btf_vmlinux can't get destructed and thus doesn't need refcounting, module BTFs need that, so apply BTF refcounting universally when BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This makes for simpler clean up code. Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF trampoline key to include BTF object ID. To differentiate that from target program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are not allowed to take full 32 bits, so there is no danger of confusing that bit with a valid BTF type ID. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org --- include/linux/bpf.h | 13 +++++++++---- include/linux/bpf_verifier.h | 28 +++++++++++++++++++++------- include/linux/btf.h | 5 ++++- 3 files changed, 34 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a9de5711b23f..d05e75ed8c1b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -421,7 +421,10 @@ struct bpf_insn_access_aux { enum bpf_reg_type reg_type; union { int ctx_field_size; - u32 btf_id; + struct { + struct btf *btf; + u32 btf_id; + }; }; struct bpf_verifier_log *log; /* for verbose logs */ }; @@ -458,6 +461,7 @@ struct bpf_verifier_ops { struct bpf_insn *dst, struct bpf_prog *prog, u32 *target_size); int (*btf_struct_access)(struct bpf_verifier_log *log, + const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); @@ -771,6 +775,7 @@ struct bpf_prog_aux { u32 ctx_arg_info_size; u32 max_rdonly_access; u32 max_rdwr_access; + struct btf *attach_btf; const struct bpf_ctx_arg_aux *ctx_arg_info; struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */ struct bpf_prog *dst_prog; @@ -1005,7 +1010,6 @@ struct bpf_event_entry { bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); int bpf_prog_calc_tag(struct bpf_prog *fp); -const char *kernel_type_name(u32 btf_type_id); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); @@ -1450,12 +1454,13 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); -int btf_struct_access(struct bpf_verifier_log *log, +int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); bool btf_struct_ids_match(struct bpf_verifier_log *log, - int off, u32 id, u32 need_type_id); + const struct btf *btf, u32 id, int off, + const struct btf *need_btf, u32 need_type_id); int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf *btf, diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 306869d4743b..e941fe1484e5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -5,6 +5,7 @@ #define _LINUX_BPF_VERIFIER_H 1 #include /* for enum bpf_reg_type */ +#include /* for struct btf and btf_id() */ #include /* for MAX_BPF_STACK */ #include @@ -43,6 +44,8 @@ enum bpf_reg_liveness { struct bpf_reg_state { /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; + /* Fixed part of pointer offset, pointer types only */ + s32 off; union { /* valid when type == PTR_TO_PACKET */ int range; @@ -52,15 +55,20 @@ struct bpf_reg_state { */ struct bpf_map *map_ptr; - u32 btf_id; /* for PTR_TO_BTF_ID */ + /* for PTR_TO_BTF_ID */ + struct { + struct btf *btf; + u32 btf_id; + }; u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */ /* Max size from any of the above. */ - unsigned long raw; + struct { + unsigned long raw1; + unsigned long raw2; + } raw; }; - /* Fixed part of pointer offset, pointer types only */ - s32 off; /* For PTR_TO_PACKET, used to find other pointers with the same variable * offset, so they can share range knowledge. * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we @@ -311,7 +319,10 @@ struct bpf_insn_aux_data { struct { enum bpf_reg_type reg_type; /* type of pseudo_btf_id */ union { - u32 btf_id; /* btf_id for struct typed var */ + struct { + struct btf *btf; + u32 btf_id; /* btf_id for struct typed var */ + }; u32 mem_size; /* mem_size for non-struct typed var */ }; } btf_var; @@ -459,9 +470,12 @@ int check_ctx_reg(struct bpf_verifier_env *env, /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, - u32 btf_id) + struct btf *btf, u32 btf_id) { - return tgt_prog ? (((u64)tgt_prog->aux->id) << 32 | btf_id) : btf_id; + if (tgt_prog) + return ((u64)tgt_prog->aux->id << 32) | btf_id; + else + return ((u64)btf_obj_id(btf) << 32) | 0x80000000 | btf_id; } int bpf_check_attach_target(struct bpf_verifier_log *log, diff --git a/include/linux/btf.h b/include/linux/btf.h index 2bf641829664..fb608e4de076 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -18,6 +18,7 @@ struct btf_show; extern const struct file_operations btf_fops; +void btf_get(struct btf *btf); void btf_put(struct btf *btf); int btf_new_fd(const union bpf_attr *attr); struct btf *btf_get_by_fd(int fd); @@ -88,7 +89,7 @@ int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj, char *buf, int len, u64 flags); int btf_get_fd_by_id(u32 id); -u32 btf_id(const struct btf *btf); +u32 btf_obj_id(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); @@ -206,6 +207,8 @@ static inline const struct btf_var_secinfo *btf_type_var_secinfo( } #ifdef CONFIG_BPF_SYSCALL +struct bpf_prog; + const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); -- cgit v1.2.3 From 290248a5b7d829871b3ea3c62578613a580a1744 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 3 Dec 2020 12:46:30 -0800 Subject: bpf: Allow to specify kernel module BTFs when attaching BPF programs Add ability for user-space programs to specify non-vmlinux BTF when attaching BTF-powered BPF programs: raw_tp, fentry/fexit/fmod_ret, LSM, etc. For this, attach_prog_fd (now with the alias name attach_btf_obj_fd) should specify FD of a module or vmlinux BTF object. For backwards compatibility reasons, 0 denotes vmlinux BTF. Only kernel BTF (vmlinux or module) can be specified. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201203204634.1325171-11-andrii@kernel.org --- include/linux/btf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index fb608e4de076..4c200f5d242b 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -90,6 +90,7 @@ int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj, int btf_get_fd_by_id(u32 id); u32 btf_obj_id(const struct btf *btf); +bool btf_is_kernel(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); -- cgit v1.2.3 From 6df3e14436f6ee254b1a4952d90ee8988be59c89 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 2 Dec 2020 18:41:02 +0000 Subject: psci: Add accessor for psci_0_1_function_ids Make it possible to retrieve a copy of the psci_0_1_function_ids struct. This is useful for KVM if it is configured to intercept host's PSCI SMCs. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20201202184122.26046-7-dbrazdil@google.com --- include/linux/psci.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/psci.h b/include/linux/psci.h index 2a1bfb890e58..4ca0060a3fc4 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -34,6 +34,15 @@ struct psci_operations { extern struct psci_operations psci_ops; +struct psci_0_1_function_ids { + u32 cpu_suspend; + u32 cpu_on; + u32 cpu_off; + u32 migrate; +}; + +struct psci_0_1_function_ids get_psci_0_1_function_ids(void); + #if defined(CONFIG_ARM_PSCI_FW) int __init psci_dt_init(void); #else -- cgit v1.2.3 From 7de3697e9cbd4bd3d62bafa249d57990e1b8f294 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Wed, 2 Dec 2020 16:54:24 -0800 Subject: Add auxiliary bus support Add support for the Auxiliary Bus, auxiliary_device and auxiliary_driver. It enables drivers to create an auxiliary_device and bind an auxiliary_driver to it. The bus supports probe/remove shutdown and suspend/resume callbacks. Each auxiliary_device has a unique string based id; driver binds to an auxiliary_device based on this id through the bus. Co-developed-by: Kiran Patil Co-developed-by: Ranjani Sridharan Co-developed-by: Fred Oh Co-developed-by: Leon Romanovsky Signed-off-by: Kiran Patil Signed-off-by: Ranjani Sridharan Signed-off-by: Fred Oh Signed-off-by: Leon Romanovsky Signed-off-by: Dave Ertman Reviewed-by: Pierre-Louis Bossart Reviewed-by: Shiraz Saleem Reviewed-by: Parav Pandit Reviewed-by: Dan Williams Reviewed-by: Martin Habets Link: https://lore.kernel.org/r/20201113161859.1775473-2-david.m.ertman@intel.com Signed-off-by: Dan Williams Link: https://lore.kernel.org/r/160695681289.505290.8978295443574440604.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 78 +++++++++++++++++++++++++++++++++++++++++ include/linux/mod_devicetable.h | 8 +++++ 2 files changed, 86 insertions(+) create mode 100644 include/linux/auxiliary_bus.h (limited to 'include/linux') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h new file mode 100644 index 000000000000..282fbf7bf9af --- /dev/null +++ b/include/linux/auxiliary_bus.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2019-2020 Intel Corporation + * + * Please see Documentation/driver-api/auxiliary_bus.rst for more information. + */ + +#ifndef _AUXILIARY_BUS_H_ +#define _AUXILIARY_BUS_H_ + +#include +#include +#include + +struct auxiliary_device { + struct device dev; + const char *name; + u32 id; +}; + +struct auxiliary_driver { + int (*probe)(struct auxiliary_device *auxdev, const struct auxiliary_device_id *id); + int (*remove)(struct auxiliary_device *auxdev); + void (*shutdown)(struct auxiliary_device *auxdev); + int (*suspend)(struct auxiliary_device *auxdev, pm_message_t state); + int (*resume)(struct auxiliary_device *auxdev); + const char *name; + struct device_driver driver; + const struct auxiliary_device_id *id_table; +}; + +static inline struct auxiliary_device *to_auxiliary_dev(struct device *dev) +{ + return container_of(dev, struct auxiliary_device, dev); +} + +static inline struct auxiliary_driver *to_auxiliary_drv(struct device_driver *drv) +{ + return container_of(drv, struct auxiliary_driver, driver); +} + +int auxiliary_device_init(struct auxiliary_device *auxdev); +int __auxiliary_device_add(struct auxiliary_device *auxdev, const char *modname); +#define auxiliary_device_add(auxdev) __auxiliary_device_add(auxdev, KBUILD_MODNAME) + +static inline void auxiliary_device_uninit(struct auxiliary_device *auxdev) +{ + put_device(&auxdev->dev); +} + +static inline void auxiliary_device_delete(struct auxiliary_device *auxdev) +{ + device_del(&auxdev->dev); +} + +int __auxiliary_driver_register(struct auxiliary_driver *auxdrv, struct module *owner, + const char *modname); +#define auxiliary_driver_register(auxdrv) \ + __auxiliary_driver_register(auxdrv, THIS_MODULE, KBUILD_MODNAME) + +void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv); + +/** + * module_auxiliary_driver() - Helper macro for registering an auxiliary driver + * @__auxiliary_driver: auxiliary driver struct + * + * Helper macro for auxiliary drivers which do not do anything special in + * module init/exit. This eliminates a lot of boilerplate. Each module may only + * use this macro once, and calling it replaces module_init() and module_exit() + */ +#define module_auxiliary_driver(__auxiliary_driver) \ + module_driver(__auxiliary_driver, auxiliary_driver_register, auxiliary_driver_unregister) + +struct auxiliary_device * +auxiliary_find_device(struct device *start, const void *data, + int (*match)(struct device *dev, const void *data)); + +#endif /* _AUXILIARY_BUS_H_ */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 5b08a473cdba..c425290b21e2 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -838,4 +838,12 @@ struct mhi_device_id { kernel_ulong_t driver_data; }; +#define AUXILIARY_NAME_SIZE 32 +#define AUXILIARY_MODULE_PREFIX "auxiliary:" + +struct auxiliary_device_id { + char name[AUXILIARY_NAME_SIZE]; + kernel_ulong_t driver_data; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ -- cgit v1.2.3 From 30ae3e13caeaa47884c222ebf5711ce27ed25f19 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 25 Nov 2020 22:29:59 +0100 Subject: mmc: tmio: set max_busy_timeout Set max_busy_timeouts for variants known to support the TOPxx bits in the SD_OPTION register. The timeout mechanism was running in the background but not yet properly handled in the driver. So, let the MMC core know when to not use R1B to avoid unhandled timeouts. My datasheets for older variants (tmio_mmc.c) suggest that they support it, too. However, actual bit descriptions are lacking, so I chose an opt-in approach. Signed-off-by: Wolfram Sang Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20201125213001.15003-2-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 8ba042430d8e..27264fe4b3b9 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -55,7 +55,12 @@ */ #define TMIO_MMC_HAS_IDLE_WAIT BIT(4) -/* BIT(5) is unused */ +/* + * Use the busy timeout feature. Probably all TMIO versions support it. Yet, + * we don't have documentation for old variants, so we enable only known good + * variants with this flag. Can be removed once all variants are known good. + */ +#define TMIO_MMC_USE_BUSY_TIMEOUT BIT(5) /* * Some controllers have CMD12 automatically -- cgit v1.2.3 From 7bbb79ff5f7499e0c5d65987458410e8099207d8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 4 Dec 2020 12:43:47 +0100 Subject: driver core: auxiliary bus: move slab.h from include file No need to include slab.h in include/linux/auxiliary_bus.h, as it is not needed there. Move it to drivers/base/auxiliary.c instead. Cc: Dan Williams Cc: Dave Ertman Cc: Fred Oh Cc: Kiran Patil Cc: Leon Romanovsky Cc: Martin Habets Cc: Parav Pandit Cc: Pierre-Louis Bossart Cc: Ranjani Sridharan Cc: Shiraz Saleem Link: https://lore.kernel.org/r/X8og8xi3WkoYXet9@kroah.com Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index 282fbf7bf9af..3580743d0e8d 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -10,7 +10,6 @@ #include #include -#include struct auxiliary_device { struct device dev; -- cgit v1.2.3 From 8142a46c50d2dd8160c42284e1044eed3bec0d18 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 4 Dec 2020 12:44:07 +0100 Subject: driver core: auxiliary bus: make remove function return void There's an effort to move the remove() callback in the driver core to not return an int, as nothing can be done if this function fails. To make that effort easier, make the aux bus remove function void to start with so that no users have to be changed sometime in the future. Cc: Dan Williams Cc: Dave Ertman Cc: Fred Oh Cc: Kiran Patil Cc: Leon Romanovsky Cc: Martin Habets Cc: Parav Pandit Cc: Pierre-Louis Bossart Cc: Ranjani Sridharan Cc: Shiraz Saleem Link: https://lore.kernel.org/r/X8ohB1ks1NK7kPop@kroah.com Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index 3580743d0e8d..d67b17606210 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -19,7 +19,7 @@ struct auxiliary_device { struct auxiliary_driver { int (*probe)(struct auxiliary_device *auxdev, const struct auxiliary_device_id *id); - int (*remove)(struct auxiliary_device *auxdev); + void (*remove)(struct auxiliary_device *auxdev); void (*shutdown)(struct auxiliary_device *auxdev); int (*suspend)(struct auxiliary_device *auxdev, pm_message_t state); int (*resume)(struct auxiliary_device *auxdev); -- cgit v1.2.3 From 0d2bf11a6b3e275a526b8d42d8d4a3a6067cf953 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 4 Dec 2020 12:49:28 +0100 Subject: driver core: auxiliary bus: minor coding style tweaks For some reason, the original aux bus patch had some really long lines in a few places, probably due to it being a very long-lived patch in development by many different people. Fix that up so that the two files all have the same length lines and function formatting styles. Cc: Dan Williams Cc: Dave Ertman Cc: Fred Oh Cc: Kiran Patil Cc: Leon Romanovsky Cc: Martin Habets Cc: Parav Pandit Cc: Pierre-Louis Bossart Cc: Ranjani Sridharan Cc: Shiraz Saleem Link: https://lore.kernel.org/r/X8oiSFTpYHw1xE/o@kroah.com Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index d67b17606210..fc51d45f106b 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -70,8 +70,8 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv); #define module_auxiliary_driver(__auxiliary_driver) \ module_driver(__auxiliary_driver, auxiliary_driver_register, auxiliary_driver_unregister) -struct auxiliary_device * -auxiliary_find_device(struct device *start, const void *data, - int (*match)(struct device *dev, const void *data)); +struct auxiliary_device *auxiliary_find_device(struct device *start, + const void *data, + int (*match)(struct device *dev, const void *data)); #endif /* _AUXILIARY_BUS_H_ */ -- cgit v1.2.3 From 17a7612b99e66d2539341ab4f888f970c2c7f76d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 4 Oct 2020 14:30:58 +0300 Subject: net/mlx5_core: Clean driver version and name Remove exposed driver version as it was done in other drivers, so module version will work correctly by displaying the kernel version for which it is compiled. And move mlx5_core module name to general include, so auxiliary drivers will be able to use it as a basis for a name in their device ID tables. Reviewed-by: Parav Pandit Reviewed-by: Roi Dayan Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0f23e1ed5e71..5f04495c33d7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -56,6 +56,8 @@ #include #include +#define MLX5_ADEV_NAME "mlx5_core" + enum { MLX5_BOARD_ID_LEN = 64, }; -- cgit v1.2.3 From 0aae392bea4da1a2a9f2e22683c0fa751dc07333 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 6 Oct 2020 12:34:25 +0300 Subject: vdpa/mlx5: Make hardware definitions visible to all mlx5 devices Move mlx5_vdpa IFC header file to the general include folder, so mlx5_core will be able to reuse it to check if VDPA is supported prior to creating an auxiliary device. As part of this move, update the header file name to mlx5 general naming scheme. Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc_vdpa.h | 166 +++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 include/linux/mlx5/mlx5_ifc_vdpa.h (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h new file mode 100644 index 000000000000..98b56b75c625 --- /dev/null +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#ifndef __MLX5_IFC_VDPA_H_ +#define __MLX5_IFC_VDPA_H_ + +enum { + MLX5_VIRTIO_Q_EVENT_MODE_NO_MSIX_MODE = 0x0, + MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE = 0x1, + MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE = 0x2, +}; + +enum { + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT = 0x1, // do I check this caps? + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED = 0x2, +}; + +enum { + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT = 0, + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED = 1, +}; + +struct mlx5_ifc_virtio_q_bits { + u8 virtio_q_type[0x8]; + u8 reserved_at_8[0x5]; + u8 event_mode[0x3]; + u8 queue_index[0x10]; + + u8 full_emulation[0x1]; + u8 virtio_version_1_0[0x1]; + u8 reserved_at_22[0x2]; + u8 offload_type[0x4]; + u8 event_qpn_or_msix[0x18]; + + u8 doorbell_stride_index[0x10]; + u8 queue_size[0x10]; + + u8 device_emulation_id[0x20]; + + u8 desc_addr[0x40]; + + u8 used_addr[0x40]; + + u8 available_addr[0x40]; + + u8 virtio_q_mkey[0x20]; + + u8 max_tunnel_desc[0x10]; + u8 reserved_at_170[0x8]; + u8 error_type[0x8]; + + u8 umem_1_id[0x20]; + + u8 umem_1_size[0x20]; + + u8 umem_1_offset[0x40]; + + u8 umem_2_id[0x20]; + + u8 umem_2_size[0x20]; + + u8 umem_2_offset[0x40]; + + u8 umem_3_id[0x20]; + + u8 umem_3_size[0x20]; + + u8 umem_3_offset[0x40]; + + u8 counter_set_id[0x20]; + + u8 reserved_at_320[0x8]; + u8 pd[0x18]; + + u8 reserved_at_340[0xc0]; +}; + +struct mlx5_ifc_virtio_net_q_object_bits { + u8 modify_field_select[0x40]; + + u8 reserved_at_40[0x20]; + + u8 vhca_id[0x10]; + u8 reserved_at_70[0x10]; + + u8 queue_feature_bit_mask_12_3[0xa]; + u8 dirty_bitmap_dump_enable[0x1]; + u8 vhost_log_page[0x5]; + u8 reserved_at_90[0xc]; + u8 state[0x4]; + + u8 reserved_at_a0[0x5]; + u8 queue_feature_bit_mask_2_0[0x3]; + u8 tisn_or_qpn[0x18]; + + u8 dirty_bitmap_mkey[0x20]; + + u8 dirty_bitmap_size[0x20]; + + u8 dirty_bitmap_addr[0x40]; + + u8 hw_available_index[0x10]; + u8 hw_used_index[0x10]; + + u8 reserved_at_160[0xa0]; + + struct mlx5_ifc_virtio_q_bits virtio_q_context; +}; + +struct mlx5_ifc_create_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + + struct mlx5_ifc_virtio_net_q_object_bits obj_context; +}; + +struct mlx5_ifc_create_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +struct mlx5_ifc_destroy_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +struct mlx5_ifc_destroy_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +struct mlx5_ifc_query_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; +}; + +struct mlx5_ifc_query_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; + + struct mlx5_ifc_virtio_net_q_object_bits obj_context; +}; + +enum { + MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0, + MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3, + MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4, +}; + +enum { + MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT = 0x0, + MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY = 0x1, + MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND = 0x2, + MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR = 0x3, +}; + +enum { + MLX5_RQTC_LIST_Q_TYPE_RQ = 0x0, + MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q = 0x1, +}; + +struct mlx5_ifc_modify_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + + struct mlx5_ifc_virtio_net_q_object_bits obj_context; +}; + +struct mlx5_ifc_modify_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +#endif /* __MLX5_IFC_VDPA_H_ */ -- cgit v1.2.3 From a925b5e309c9b998658a6a94dbb53154ea901299 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 8 Oct 2020 16:06:37 +0300 Subject: net/mlx5: Register mlx5 devices to auxiliary virtual bus Create auxiliary devices under new virtual bus. This will replace the custom-made mlx5 ->add()/->remove() interfaces and next patches will fill the missing callback and remove the old interface logic. The attachment of auxiliary drivers to the devices is possible in 1-to-1 manner only and it requires us to create device for every protocol, so that device (module) will be able to connect to it. System with 2 IB and 1 RoCE cards: [leonro@vm ~]$ lspci |grep nox 00:09.0 Ethernet controller: Mellanox Technologies MT27800 Family [ConnectX-5] 00:0a.0 Ethernet controller: Mellanox Technologies MT28908 Family [ConnectX-6] 00:0b.0 Ethernet controller: Mellanox Technologies MT2910 Family [ConnectX-7] [leonro@vm ~]$ ls -l /sys/bus/auxiliary/devices/ mlx5_core.eth.2 -> ../../../devices/pci0000:00/0000:00:0b.0/mlx5_core.eth.2 mlx5_core.rdma.0 -> ../../../devices/pci0000:00/0000:00:09.0/mlx5_core.rdma.0 mlx5_core.rdma.1 -> ../../../devices/pci0000:00/0000:00:0a.0/mlx5_core.rdma.1 mlx5_core.rdma.2 -> ../../../devices/pci0000:00/0000:00:0b.0/mlx5_core.rdma.2 mlx5_core.vdpa.1 -> ../../../devices/pci0000:00/0000:00:0a.0/mlx5_core.vdpa.1 mlx5_core.vdpa.2 -> ../../../devices/pci0000:00/0000:00:0b.0/mlx5_core.vdpa.2 [leonro@vm ~]$ rdma dev 0: ibp0s9: node_type ca fw 4.6.9999 node_guid 5254:00c0:fe12:3455 sys_image_guid 5254:00c0:fe12:3455 1: ibp0s10: node_type ca fw 4.6.9999 node_guid 5254:00c0:fe12:3456 sys_image_guid 5254:00c0:fe12:3456 2: rdmap0s11: node_type ca fw 4.6.9999 node_guid 5254:00c0:fe12:3457 sys_image_guid 5254:00c0:fe12:3457 System with RoCE SR-IOV card with 4 VFs: [leonro@vm ~]$ lspci |grep nox 01:00.0 Ethernet controller: Mellanox Technologies MT28908 Family [ConnectX-6] 01:00.1 Ethernet controller: Mellanox Technologies MT28908 Family [ConnectX-6 Virtual Function] 01:00.2 Ethernet controller: Mellanox Technologies MT28908 Family [ConnectX-6 Virtual Function] 01:00.3 Ethernet controller: Mellanox Technologies MT28908 Family [ConnectX-6 Virtual Function] 01:00.4 Ethernet controller: Mellanox Technologies MT28908 Family [ConnectX-6 Virtual Function] [leonro@vm ~]$ ls -l /sys/bus/auxiliary/devices/ mlx5_core.eth.0 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.0/mlx5_core.eth.0 mlx5_core.eth.1 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.1/mlx5_core.eth.1 mlx5_core.eth.2 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.2/mlx5_core.eth.2 mlx5_core.eth.3 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.3/mlx5_core.eth.3 mlx5_core.eth.4 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.4/mlx5_core.eth.4 mlx5_core.rdma.0 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.0/mlx5_core.rdma.0 mlx5_core.rdma.1 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.1/mlx5_core.rdma.1 mlx5_core.rdma.2 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.2/mlx5_core.rdma.2 mlx5_core.rdma.3 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.3/mlx5_core.rdma.3 mlx5_core.rdma.4 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.4/mlx5_core.rdma.4 mlx5_core.vdpa.1 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.1/mlx5_core.vdpa.1 mlx5_core.vdpa.2 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.2/mlx5_core.vdpa.2 mlx5_core.vdpa.3 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.3/mlx5_core.vdpa.3 mlx5_core.vdpa.4 -> ../../../devices/pci0000:00/0000:00:09.0/0000:01:00.4/mlx5_core.vdpa.4 [leonro@vm ~]$ rdma dev 0: rocep1s0f0: node_type ca fw 4.6.9999 node_guid 5254:00c0:fe12:3455 sys_image_guid 5254:00c0:fe12:3455 1: rocep1s0f0v0: node_type ca fw 4.6.9999 node_guid 0000:0000:0000:0000 sys_image_guid 5254:00c0:fe12:3456 2: rocep1s0f0v1: node_type ca fw 4.6.9999 node_guid 0000:0000:0000:0000 sys_image_guid 5254:00c0:fe12:3457 3: rocep1s0f0v2: node_type ca fw 4.6.9999 node_guid 0000:0000:0000:0000 sys_image_guid 5254:00c0:fe12:3458 4: rocep1s0f0v3: node_type ca fw 4.6.9999 node_guid 0000:0000:0000:0000 sys_image_guid 5254:00c0:fe12:3459 Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5f04495c33d7..e31c72693bcf 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -536,6 +537,17 @@ struct mlx5_core_roce { struct mlx5_flow_handle *allow_rule; }; +enum { + MLX5_PRIV_FLAGS_DISABLE_IB_ADEV = 1 << 0, + MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV = 1 << 1, +}; + +struct mlx5_adev { + struct auxiliary_device adev; + struct mlx5_core_dev *mdev; + int idx; +}; + struct mlx5_priv { /* IRQ table valid only for real pci devices PF or VF */ struct mlx5_irq_table *irq_table; @@ -573,6 +585,8 @@ struct mlx5_priv { struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; + struct mlx5_adev **adev; + int adev_idx; struct mlx5_events *events; struct mlx5_flow_steering *steering; @@ -580,6 +594,7 @@ struct mlx5_priv { struct mlx5_eswitch *eswitch; struct mlx5_core_sriov sriov; struct mlx5_lag *lag; + u32 flags; struct mlx5_devcom *devcom; struct mlx5_fw_reset *fw_reset; struct mlx5_core_roce roce; @@ -1062,9 +1077,14 @@ enum { }; enum { - MLX5_INTERFACE_PROTOCOL_IB = 0, - MLX5_INTERFACE_PROTOCOL_ETH = 1, - MLX5_INTERFACE_PROTOCOL_VDPA = 2, + MLX5_INTERFACE_PROTOCOL_ETH_REP, + MLX5_INTERFACE_PROTOCOL_ETH, + + MLX5_INTERFACE_PROTOCOL_IB_REP, + MLX5_INTERFACE_PROTOCOL_MPIB, + MLX5_INTERFACE_PROTOCOL_IB, + + MLX5_INTERFACE_PROTOCOL_VDPA, }; struct mlx5_interface { -- cgit v1.2.3 From 62dcd9c59f324e484c1d655884e0101a988f6671 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:13 +0100 Subject: earlycon: simplify earlycon-table implementation Instead of using the array-of-pointers trick to avoid having gcc mess up the earlycon array stride, specify type alignment when declaring entries to prevent gcc from increasing alignment. This is essentially an alternative (one-line) fix to the problem addressed by commit dd709e72cb93 ("earlycon: Use a pointer table to fix __earlycon_table stride"). gcc can increase the alignment of larger objects with static extent as an optimisation, but this can be suppressed by using the aligned attribute when declaring variables. Note that we have been relying on this behaviour for kernel parameters for 16 years and it indeed hasn't changed since the introduction of the aligned attribute in gcc-3.1. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20201123102319.8090-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index ff63c2963359..3e32b788c28d 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -357,8 +357,8 @@ struct earlycon_id { int (*setup)(struct earlycon_device *, const char *options); }; -extern const struct earlycon_id *__earlycon_table[]; -extern const struct earlycon_id *__earlycon_table_end[]; +extern const struct earlycon_id __earlycon_table[]; +extern const struct earlycon_id __earlycon_table_end[]; #if defined(CONFIG_SERIAL_EARLYCON) && !defined(MODULE) #define EARLYCON_USED_OR_UNUSED __used @@ -366,19 +366,13 @@ extern const struct earlycon_id *__earlycon_table_end[]; #define EARLYCON_USED_OR_UNUSED __maybe_unused #endif -#define _OF_EARLYCON_DECLARE(_name, compat, fn, unique_id) \ - static const struct earlycon_id unique_id \ - EARLYCON_USED_OR_UNUSED __initconst \ +#define OF_EARLYCON_DECLARE(_name, compat, fn) \ + static const struct earlycon_id __UNIQUE_ID(__earlycon_##_name) \ + EARLYCON_USED_OR_UNUSED __section("__earlycon_table") \ + __aligned(__alignof__(struct earlycon_id)) \ = { .name = __stringify(_name), \ .compatible = compat, \ - .setup = fn }; \ - static const struct earlycon_id EARLYCON_USED_OR_UNUSED \ - __section("__earlycon_table") \ - * const __PASTE(__p, unique_id) = &unique_id - -#define OF_EARLYCON_DECLARE(_name, compat, fn) \ - _OF_EARLYCON_DECLARE(_name, compat, fn, \ - __UNIQUE_ID(__earlycon_##_name)) + .setup = fn }; #define EARLYCON_DECLARE(_name, fn) OF_EARLYCON_DECLARE(_name, "", fn) -- cgit v1.2.3 From 5812b32e01c6d86ba7a84110702b46d8a8531fe9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 23 Nov 2020 11:23:12 +0100 Subject: of: fix linker-section match-table corruption Specify type alignment when declaring linker-section match-table entries to prevent gcc from increasing alignment and corrupting the various tables with padding (e.g. timers, irqchips, clocks, reserved memory). This is specifically needed on x86 where gcc (typically) aligns larger objects like struct of_device_id with static extent on 32-byte boundaries which at best prevents matching on anything but the first entry. Specifying alignment when declaring variables suppresses this optimisation. Here's a 64-bit example where all entries are corrupt as 16 bytes of padding has been inserted before the first entry: ffffffff8266b4b0 D __clk_of_table ffffffff8266b4c0 d __of_table_fixed_factor_clk ffffffff8266b5a0 d __of_table_fixed_clk ffffffff8266b680 d __clk_of_table_sentinel And here's a 32-bit example where the 8-byte-aligned table happens to be placed on a 32-byte boundary so that all but the first entry are corrupt due to the 28 bytes of padding inserted between entries: 812b3ec0 D __irqchip_of_table 812b3ec0 d __of_table_irqchip1 812b3fa0 d __of_table_irqchip2 812b4080 d __of_table_irqchip3 812b4160 d irqchip_of_match_end Verified on x86 using gcc-9.3 and gcc-4.9 (which uses 64-byte alignment), and on arm using gcc-7.2. Note that there are no in-tree users of these tables on x86 currently (even if they are included in the image). Fixes: 54196ccbe0ba ("of: consolidate linker section OF match table declarations") Fixes: f6e916b82022 ("irqchip: add basic infrastructure") Cc: stable # 3.9 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20201123102319.8090-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/of.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 5d51891cbf1a..af655d264f10 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1300,6 +1300,7 @@ static inline int of_get_available_child_count(const struct device_node *np) #define _OF_DECLARE(table, name, compat, fn, fn_type) \ static const struct of_device_id __of_table_##name \ __used __section("__" #table "_of_table") \ + __aligned(__alignof__(struct of_device_id)) \ = { .compatible = compat, \ .data = (fn == (fn_type)NULL) ? fn : fn } #else -- cgit v1.2.3 From e0efb3168d34dc8c8c72718672b8902e40efff8f Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 3 Dec 2020 03:03:31 +0100 Subject: tty: Remove dead termiox code set_termiox() and the TCGETX handler bail out with -EINVAL immediately if ->termiox is NULL, but there are no code paths that can set ->termiox to a non-NULL pointer; and no such code paths seem to have existed since the termiox mechanism was introduced back in commit 1d65b4a088de ("tty: Add termiox") in v2.6.28. Similarly, no driver actually implements .set_termiox; and it looks like no driver ever has. Delete this dead code; but leave the definition of struct termiox in the UAPI headers intact. Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20201203020331.2394754-1-jannh@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 1 - include/linux/tty_driver.h | 9 --------- 2 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 10212c6e4345..67c7a07c8083 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -303,7 +303,6 @@ struct tty_struct { spinlock_t flow_lock; /* Termios values are protected by the termios rwsem */ struct ktermios termios, termios_locked; - struct termiox *termiox; /* May be NULL for unsupported */ char name[64]; struct pid *pgrp; /* Protected by ctrl lock */ struct pid *session; diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 358446247ccd..61c3372d3f32 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -224,14 +224,6 @@ * line). See tty_do_resize() if you need to wrap the standard method * in your own logic - the usual case. * - * void (*set_termiox)(struct tty_struct *tty, struct termiox *new); - * - * Called when the device receives a termiox based ioctl. Passes down - * the requested data from user space. This method will not be invoked - * unless the tty also has a valid tty->termiox pointer. - * - * Optional: Called under the termios lock - * * int (*get_icount)(struct tty_struct *tty, struct serial_icounter *icount); * * Called when the device receives a TIOCGICOUNT ioctl. Passed a kernel @@ -285,7 +277,6 @@ struct tty_operations { int (*tiocmset)(struct tty_struct *tty, unsigned int set, unsigned int clear); int (*resize)(struct tty_struct *tty, struct winsize *ws); - int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew); int (*get_icount)(struct tty_struct *tty, struct serial_icounter_struct *icount); int (*get_serial)(struct tty_struct *tty, struct serial_struct *p); -- cgit v1.2.3 From c8bcd9c5be24fb9e6132e97da5a35e55a83e36b9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 3 Dec 2020 02:25:05 +0100 Subject: tty: Fix ->session locking Currently, locking of ->session is very inconsistent; most places protect it using the legacy tty mutex, but disassociate_ctty(), __do_SAK(), tiocspgrp() and tiocgsid() don't. Two of the writers hold the ctrl_lock (because they already need it for ->pgrp), but __proc_set_tty() doesn't do that yet. On a PREEMPT=y system, an unprivileged user can theoretically abuse this broken locking to read 4 bytes of freed memory via TIOCGSID if tiocgsid() is preempted long enough at the right point. (Other things might also go wrong, especially if root-only ioctls are involved; I'm not sure about that.) Change the locking on ->session such that: - tty_lock() is held by all writers: By making disassociate_ctty() hold it. This should be fine because the same lock can already be taken through the call to tty_vhangup_session(). The tricky part is that we need to shorten the area covered by siglock to be able to take tty_lock() without ugly retry logic; as far as I can tell, this should be fine, since nothing in the signal_struct is touched in the `if (tty)` branch. - ctrl_lock is held by all writers: By changing __proc_set_tty() to hold the lock a little longer. - All readers that aren't holding tty_lock() hold ctrl_lock: By adding locking to tiocgsid() and __do_SAK(), and expanding the area covered by ctrl_lock in tiocspgrp(). Cc: stable@kernel.org Signed-off-by: Jann Horn Reviewed-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index a99e9b8e4e31..eb33d948788c 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -306,6 +306,10 @@ struct tty_struct { struct termiox *termiox; /* May be NULL for unsupported */ char name[64]; struct pid *pgrp; /* Protected by ctrl lock */ + /* + * Writes protected by both ctrl lock and legacy mutex, readers must use + * at least one of them. + */ struct pid *session; unsigned long flags; int count; -- cgit v1.2.3 From a54895fa057c67700270777f7661d8d3c7fda88a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Dec 2020 17:21:39 +0100 Subject: block: remove the request_queue to argument request based tracepoints The request_queue can trivially be derived from the request. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 3b6ff5902edc..05556573b896 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -75,8 +75,7 @@ static inline bool blk_trace_note_message_enabled(struct request_queue *q) return ret; } -extern void blk_add_driver_data(struct request_queue *q, struct request *rq, - void *data, size_t len); +extern void blk_add_driver_data(struct request *rq, void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, char __user *arg); @@ -90,7 +89,7 @@ extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) # define blk_trace_shutdown(q) do { } while (0) -# define blk_add_driver_data(q, rq, data, len) do {} while (0) +# define blk_add_driver_data(rq, data, len) do {} while (0) # define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) -- cgit v1.2.3 From c9d659b60770db94b898f94947192a94bbf95c5c Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 20 Nov 2020 16:10:23 -0800 Subject: PCI/ERR: Bind RCEC devices to the Root Port driver If a Root Complex Integrated Endpoint (RCiEP) is implemented, it may signal errors through a Root Complex Event Collector (RCEC). Each RCiEP must be associated with no more than one RCEC. For an RCEC (which is technically not a Bridge), error messages "received" from associated RCiEPs must be enabled for "transmission" in order to cause a System Error via the Root Control register or (when the Advanced Error Reporting Capability is present) reporting via the Root Error Command register and logging in the Root Error Status register and Error Source Identification register. Given the commonality with Root Ports and the need to also support AER and PME services for RCECs, extend the Root Port driver to support RCEC devices by adding the RCEC Class ID to the driver structure. Co-developed-by: Sean V Kelley Link: https://lore.kernel.org/r/20201121001036.8560-3-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Sean V Kelley Signed-off-by: Qiuxu Zhuo Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Kuppuswamy Sathyanarayanan --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 1ab1e24bcbce..d8156a5dbee8 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -81,6 +81,7 @@ #define PCI_CLASS_SYSTEM_RTC 0x0803 #define PCI_CLASS_SYSTEM_PCI_HOTPLUG 0x0804 #define PCI_CLASS_SYSTEM_SDHCI 0x0805 +#define PCI_CLASS_SYSTEM_RCEC 0x0807 #define PCI_CLASS_SYSTEM_OTHER 0x0880 #define PCI_BASE_CLASS_INPUT 0x09 -- cgit v1.2.3 From 90655631988f8f501529e6de5f13614389717ead Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Fri, 20 Nov 2020 16:10:24 -0800 Subject: PCI/ERR: Cache RCEC EA Capability offset in pci_init_capabilities() Extend support for Root Complex Event Collectors by decoding and caching the RCEC Endpoint Association Extended Capabilities when enumerating. Use that cached information for later error source reporting. See PCIe r5.0, sec 7.9.10. Co-developed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20201121001036.8560-4-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Qiuxu Zhuo Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- include/linux/pci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 22207a79762c..f8c927fd0602 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -304,6 +304,7 @@ struct pcie_link_state; struct pci_vpd; struct pci_sriov; struct pci_p2pdma; +struct rcec_ea; /* The pci_dev structure describes PCI devices */ struct pci_dev { @@ -326,6 +327,9 @@ struct pci_dev { #ifdef CONFIG_PCIEAER u16 aer_cap; /* AER capability offset */ struct aer_stats *aer_stats; /* AER stats for this device */ +#endif +#ifdef CONFIG_PCIEPORTBUS + struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */ #endif u8 pcie_cap; /* PCIe capability offset */ u8 msi_cap; /* MSI capability offset */ -- cgit v1.2.3 From 3ee16db390b42b8a21f2ad2ea2518f3469c6e532 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 30 Nov 2020 10:57:43 -0500 Subject: dm: fix IO splitting Commit 882ec4e609c1 ("dm table: stack 'chunk_sectors' limit to account for target-specific splitting") caused a couple regressions: 1) Using lcm_not_zero() when stacking chunk_sectors was a bug because chunk_sectors must reflect the most limited of all devices in the IO stack. 2) DM targets that set max_io_len but that do _not_ provide an .iterate_devices method no longer had there IO split properly. And commit 5091cdec56fa ("dm: change max_io_len() to use blk_max_size_offset()") also caused a regression where DM no longer supported varied (per target) IO splitting. The implication being the potential for severely reduced performance for IO stacks that use a DM target like dm-cache to hide performance limitations of a slower device (e.g. one that requires 4K IO splitting). Coming full circle: Fix all these issues by discontinuing stacking chunk_sectors up using ti->max_io_len in dm_calculate_queue_limits(), add optional chunk_sectors override argument to blk_max_size_offset() and update DM's max_io_len() to pass ti->max_io_len to its blk_max_size_offset() call. Passing in an optional chunk_sectors override to blk_max_size_offset() allows for code reuse of block's centralized calculation for max IO size based on provided offset and split boundary. Fixes: 882ec4e609c1 ("dm table: stack 'chunk_sectors' limit to account for target-specific splitting") Fixes: 5091cdec56fa ("dm: change max_io_len() to use blk_max_size_offset()") Cc: stable@vger.kernel.org Reported-by: John Dorminy Reported-by: Bruce Johnston Reported-by: Kirill Tkhai Reviewed-by: John Dorminy Signed-off-by: Mike Snitzer Reviewed-by: Jens Axboe --- include/linux/blkdev.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 639cae2c158b..24ae504cf77d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1073,11 +1073,12 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, * file system requests. */ static inline unsigned int blk_max_size_offset(struct request_queue *q, - sector_t offset) + sector_t offset, + unsigned int chunk_sectors) { - unsigned int chunk_sectors = q->limits.chunk_sectors; - - if (!chunk_sectors) + if (!chunk_sectors && q->limits.chunk_sectors) + chunk_sectors = q->limits.chunk_sectors; + else return q->limits.max_sectors; if (likely(is_power_of_2(chunk_sectors))) @@ -1101,7 +1102,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq, req_op(rq) == REQ_OP_SECURE_ERASE) return blk_queue_get_max_sectors(q, req_op(rq)); - return min(blk_max_size_offset(q, offset), + return min(blk_max_size_offset(q, offset, 0), blk_queue_get_max_sectors(q, req_op(rq))); } -- cgit v1.2.3 From f646c2a0a6685a8a30a150509b112c911b8feff3 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Sun, 29 Nov 2020 22:16:26 +0530 Subject: PCI: Return u8 from pci_find_capability() and similar PCI Capabilities are linked in a list that must appear in the first 256 bytes of config space. Each capabilities list pointer is 8 bits. Change the return type of pci_find_capability() and supporting functions from int to u8 to match the specification. [bhelgaas: change other related interfaces, fix HyperTransport typos] Link: https://lore.kernel.org/r/20201129164626.12887-1-puranjay12@gmail.com Signed-off-by: Puranjay Mohan Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index e007bc3e8b6e..e615f8abdd79 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1064,12 +1064,13 @@ void pci_sort_breadthfirst(void); /* Generic PCI functions exported to card drivers */ -int pci_find_capability(struct pci_dev *dev, int cap); -int pci_find_next_capability(struct pci_dev *dev, u8 pos, int cap); +u8 pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); +u8 pci_find_capability(struct pci_dev *dev, int cap); +u8 pci_find_next_capability(struct pci_dev *dev, u8 pos, int cap); +u8 pci_find_ht_capability(struct pci_dev *dev, int ht_cap); +u8 pci_find_next_ht_capability(struct pci_dev *dev, u8 pos, int ht_cap); int pci_find_ext_capability(struct pci_dev *dev, int cap); int pci_find_next_ext_capability(struct pci_dev *dev, int pos, int cap); -int pci_find_ht_capability(struct pci_dev *dev, int ht_cap); -int pci_find_next_ht_capability(struct pci_dev *dev, int pos, int ht_cap); struct pci_bus *pci_find_next_bus(const struct pci_bus *from); u64 pci_get_dsn(struct pci_dev *dev); @@ -1280,7 +1281,6 @@ void set_pcie_port_type(struct pci_dev *pdev); void set_pcie_hotplug_bridge(struct pci_dev *pdev); /* Functions for PCI Hotplug drivers to use */ -int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); unsigned int pci_rescan_bus_bridge_resize(struct pci_dev *bridge); unsigned int pci_rescan_bus(struct pci_bus *bus); void pci_lock_rescan_remove(void); @@ -1720,7 +1720,7 @@ static inline int __pci_register_driver(struct pci_driver *drv, static inline int pci_register_driver(struct pci_driver *drv) { return 0; } static inline void pci_unregister_driver(struct pci_driver *drv) { } -static inline int pci_find_capability(struct pci_dev *dev, int cap) +static inline u8 pci_find_capability(struct pci_dev *dev, int cap) { return 0; } static inline int pci_find_next_capability(struct pci_dev *dev, u8 post, int cap) -- cgit v1.2.3 From ee8b1c478a9fbce9c64151ee561c124c4dcd66be Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 4 Dec 2020 15:14:07 -0600 Subject: PCI: Return u16 from pci_find_ext_capability() and similar PCI Express Extended Capabilities are in config space between offsets 256 and 4K. These offsets all fit in 16 bits. Change the return type of pci_find_ext_capability() and supporting functions from int to u16 to match the specification. Many callers use "int", which is fine, but there's no need to store more than a u16. Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index e615f8abdd79..441e5753da0c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -381,7 +381,7 @@ struct pci_dev { struct pcie_link_state *link_state; /* ASPM link state */ unsigned int ltr_path:1; /* Latency Tolerance Reporting supported from root to here */ - int l1ss; /* L1SS Capability pointer */ + u16 l1ss; /* L1SS Capability pointer */ #endif unsigned int eetlp_prefix_path:1; /* End-to-End TLP Prefix */ @@ -1069,8 +1069,8 @@ u8 pci_find_capability(struct pci_dev *dev, int cap); u8 pci_find_next_capability(struct pci_dev *dev, u8 pos, int cap); u8 pci_find_ht_capability(struct pci_dev *dev, int ht_cap); u8 pci_find_next_ht_capability(struct pci_dev *dev, u8 pos, int ht_cap); -int pci_find_ext_capability(struct pci_dev *dev, int cap); -int pci_find_next_ext_capability(struct pci_dev *dev, int pos, int cap); +u16 pci_find_ext_capability(struct pci_dev *dev, int cap); +u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 pos, int cap); struct pci_bus *pci_find_next_bus(const struct pci_bus *from); u64 pci_get_dsn(struct pci_dev *dev); -- cgit v1.2.3 From dba4a9256bb4d78ef89aaad5f49787aa27fcd5b4 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Fri, 4 Dec 2020 12:36:04 +0100 Subject: net: Remove the err argument from sock_from_file Currently, the sock_from_file prototype takes an "err" pointer that is either not set or set to -ENOTSOCK IFF the returned socket is NULL. This makes the error redundant and it is ignored by a few callers. This patch simplifies the API by letting callers deduce the error based on whether the returned socket is NULL or not. Suggested-by: Al Viro Signed-off-by: Florent Revest Signed-off-by: Daniel Borkmann Reviewed-by: KP Singh Link: https://lore.kernel.org/bpf/20201204113609.1850150-1-revest@google.com --- include/linux/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 0dcd51feef02..9e2324efc26a 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -240,7 +240,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg); int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags); struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname); struct socket *sockfd_lookup(int fd, int *err); -struct socket *sock_from_file(struct file *file, int *err); +struct socket *sock_from_file(struct file *file); #define sockfd_put(sock) fput(sock->file) int net_ratelimit(void); -- cgit v1.2.3 From 65f33b35722952fa076811d5686bfd8a611a80fa Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 4 Dec 2020 17:21:03 -0500 Subject: block: fix incorrect branching in blk_max_size_offset() If non-zero 'chunk_sectors' is passed in to blk_max_size_offset() that override will be incorrectly ignored. Old blk_max_size_offset() branching, prior to commit 3ee16db390b4, must be used only if passed 'chunk_sectors' override is zero. Fixes: 3ee16db390b4 ("dm: fix IO splitting") Cc: stable@vger.kernel.org # 5.9 Reported-by: John Dorminy Signed-off-by: Mike Snitzer --- include/linux/blkdev.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 24ae504cf77d..033eb5f73b65 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1076,10 +1076,12 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q, sector_t offset, unsigned int chunk_sectors) { - if (!chunk_sectors && q->limits.chunk_sectors) - chunk_sectors = q->limits.chunk_sectors; - else - return q->limits.max_sectors; + if (!chunk_sectors) { + if (q->limits.chunk_sectors) + chunk_sectors = q->limits.chunk_sectors; + else + return q->limits.max_sectors; + } if (likely(is_power_of_2(chunk_sectors))) chunk_sectors -= offset & (chunk_sectors - 1); -- cgit v1.2.3 From 99efde6c9bb7b42eac0459876bf964fe08e5cef9 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 25 Nov 2020 12:07:33 +0300 Subject: PCI/PM: Rename pci_wakeup_bus() to pci_resume_bus() A "wakeup" is a signal from a device telling the system that the device or the whole system should be awakened and made active. PCI devices are made active by "resuming" them. pci_wakeup_bus() is not involved with the wakeup signal; it *resumes* devices on a bus (possibly in response to a wakeup signal, but that's at a higher level). Rename pci_wakeup_bus() to pci_resume_bus() to better reflect what it does. No functional change intended. [bhelgaas: commit log, reorder before removal of pci_wakeup_event()] Link: https://lore.kernel.org/r/20201125090733.77782-2-mika.westerberg@linux.intel.com Signed-off-by: Mika Westerberg Signed-off-by: Bjorn Helgaas Reviewed-by: Rafael J. Wysocki --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 22207a79762c..9256ef2e4327 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1271,7 +1271,7 @@ bool pci_dev_run_wake(struct pci_dev *dev); void pci_d3cold_enable(struct pci_dev *dev); void pci_d3cold_disable(struct pci_dev *dev); bool pcie_relaxed_ordering_enabled(struct pci_dev *dev); -void pci_wakeup_bus(struct pci_bus *bus); +void pci_resume_bus(struct pci_bus *bus); void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state); /* For use by arch with custom probe code */ -- cgit v1.2.3 From ed9b25d1970a4787ac6a39c2091e63b127ecbfc1 Mon Sep 17 00:00:00 2001 From: Serge Hallyn Date: Sun, 15 Nov 2020 21:55:31 -0600 Subject: [SECURITY] fix namespaced fscaps when !CONFIG_SECURITY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Namespaced file capabilities were introduced in 8db6c34f1dbc . When userspace reads an xattr for a namespaced capability, a virtualized representation of it is returned if the caller is in a user namespace owned by the capability's owning rootid. The function which performs this virtualization was not hooked up if CONFIG_SECURITY=n. Therefore in that case the original xattr was shown instead of the virtualized one. To test this using libcap-bin (*1), $ v=$(mktemp) $ unshare -Ur setcap cap_sys_admin-eip $v $ unshare -Ur setcap -v cap_sys_admin-eip $v /tmp/tmp.lSiIFRvt8Y: OK "setcap -v" verifies the values instead of setting them, and will check whether the rootid value is set. Therefore, with this bug un-fixed, and with CONFIG_SECURITY=n, setcap -v will fail: $ v=$(mktemp) $ unshare -Ur setcap cap_sys_admin=eip $v $ unshare -Ur setcap -v cap_sys_admin=eip $v nsowner[got=1000, want=0],/tmp/tmp.HHDiOOl9fY differs in [] Fix this bug by calling cap_inode_getsecurity() in security_inode_getsecurity() instead of returning -EOPNOTSUPP, when CONFIG_SECURITY=n. *1 - note, if libcap is too old for getcap to have the '-n' option, then use verify-caps instead. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=209689 Cc: Hervé Guillemet Acked-by: Casey Schaufler Signed-off-by: Serge Hallyn Signed-off-by: Andrew G. Morgan Signed-off-by: James Morris --- include/linux/security.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 0a0a03b36a3b..2befc0a25eb3 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -864,7 +864,7 @@ static inline int security_inode_killpriv(struct dentry *dentry) static inline int security_inode_getsecurity(struct inode *inode, const char *name, void **buffer, bool alloc) { - return -EOPNOTSUPP; + return cap_inode_getsecurity(inode, name, buffer, alloc); } static inline int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags) -- cgit v1.2.3 From 507b460f814458605c47b0ed03c11e49a712fc08 Mon Sep 17 00:00:00 2001 From: Sean V Kelley Date: Fri, 20 Nov 2020 16:10:32 -0800 Subject: PCI/ERR: Add pcie_link_rcec() to associate RCiEPs A Root Complex Event Collector terminates error and PME messages from associated RCiEPs. Use the RCEC Endpoint Association Extended Capability to identify associated RCiEPs. Link the associated RCiEPs as the RCECs are enumerated. Co-developed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20201121001036.8560-12-sean.v.kelley@intel.com Tested-by: Jonathan Cameron # non-native/no RCEC Signed-off-by: Qiuxu Zhuo Signed-off-by: Sean V Kelley Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f8c927fd0602..7c7d2d23e8a3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -330,6 +330,7 @@ struct pci_dev { #endif #ifdef CONFIG_PCIEPORTBUS struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */ + struct pci_dev *rcec; /* Associated RCEC device */ #endif u8 pcie_cap; /* PCIe capability offset */ u8 msi_cap; /* MSI capability offset */ -- cgit v1.2.3 From 601c10c89cbb32b9123d8716d193e6d1a8e5300d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 5 Oct 2020 11:13:56 +0300 Subject: net/mlx5: Delete custom device management logic After conversion to use auxiliary bus, all custom device management is not needed anymore, delete it. Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e31c72693bcf..54299305a2c8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1076,28 +1076,6 @@ enum { MAX_MR_CACHE_ENTRIES }; -enum { - MLX5_INTERFACE_PROTOCOL_ETH_REP, - MLX5_INTERFACE_PROTOCOL_ETH, - - MLX5_INTERFACE_PROTOCOL_IB_REP, - MLX5_INTERFACE_PROTOCOL_MPIB, - MLX5_INTERFACE_PROTOCOL_IB, - - MLX5_INTERFACE_PROTOCOL_VDPA, -}; - -struct mlx5_interface { - void * (*add)(struct mlx5_core_dev *dev); - void (*remove)(struct mlx5_core_dev *dev, void *context); - int (*attach)(struct mlx5_core_dev *dev, void *context); - void (*detach)(struct mlx5_core_dev *dev, void *context); - int protocol; - struct list_head list; -}; - -int mlx5_register_interface(struct mlx5_interface *intf); -void mlx5_unregister_interface(struct mlx5_interface *intf); int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb); int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb); int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb); -- cgit v1.2.3 From e87114022e1de734de0552e6b4f2dc5309efa27a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sat, 10 Oct 2020 11:57:26 +0300 Subject: net/mlx5: Simplify eswitch mode check Provide mlx5_core device instead of "priv" pointer while checking eswith mode. Reviewed-by: Roi Dayan Signed-off-by: Leon Romanovsky --- include/linux/mlx5/eswitch.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index b0ae8020f13e..29fd832950e0 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -96,10 +96,10 @@ static inline u32 mlx5_eswitch_get_vport_metadata_mask(void) u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num); -u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); +u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ -static inline u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) +static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev) { return MLX5_ESWITCH_NONE; } @@ -136,4 +136,8 @@ mlx5_eswitch_get_vport_metadata_mask(void) } #endif /* CONFIG_MLX5_ESWITCH */ +static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev) +{ + return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS; +} #endif -- cgit v1.2.3 From e91d8d78237de8d7120c320b3645b7100848f24d Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Sat, 5 Dec 2020 22:14:51 -0800 Subject: mm/zsmalloc.c: drop ZSMALLOC_PGTABLE_MAPPING While I was doing zram testing, I found sometimes decompression failed since the compression buffer was corrupted. With investigation, I found below commit calls cond_resched unconditionally so it could make a problem in atomic context if the task is reschedule. BUG: sleeping function called from invalid context at mm/vmalloc.c:108 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 946, name: memhog 3 locks held by memhog/946: #0: ffff9d01d4b193e8 (&mm->mmap_lock#2){++++}-{4:4}, at: __mm_populate+0x103/0x160 #1: ffffffffa3d53de0 (fs_reclaim){+.+.}-{0:0}, at: __alloc_pages_slowpath.constprop.0+0xa98/0x1160 #2: ffff9d01d56b8110 (&zspage->lock){.+.+}-{3:3}, at: zs_map_object+0x8e/0x1f0 CPU: 0 PID: 946 Comm: memhog Not tainted 5.9.3-00011-gc5bfc0287345-dirty #316 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014 Call Trace: unmap_kernel_range_noflush+0x2eb/0x350 unmap_kernel_range+0x14/0x30 zs_unmap_object+0xd5/0xe0 zram_bvec_rw.isra.0+0x38c/0x8e0 zram_rw_page+0x90/0x101 bdev_write_page+0x92/0xe0 __swap_writepage+0x94/0x4a0 pageout+0xe3/0x3a0 shrink_page_list+0xb94/0xd60 shrink_inactive_list+0x158/0x460 We can fix this by removing the ZSMALLOC_PGTABLE_MAPPING feature (which contains the offending calling code) from zsmalloc. Even though this option showed some amount improvement(e.g., 30%) in some arm32 platforms, it has been headache to maintain since it have abused APIs[1](e.g., unmap_kernel_range in atomic context). Since we are approaching to deprecate 32bit machines and already made the config option available for only builtin build since v5.8, lastly it has been not default option in zsmalloc, it's time to drop the option for better maintenance. [1] http://lore.kernel.org/linux-mm/20201105170249.387069-1-minchan@kernel.org Fixes: e47110e90584 ("mm/vunmap: add cond_resched() in vunmap_pmd_range") Signed-off-by: Minchan Kim Signed-off-by: Andrew Morton Reviewed-by: Sergey Senozhatsky Cc: Tony Lindgren Cc: Christoph Hellwig Cc: Harish Sriram Cc: Uladzislau Rezki Cc: Link: https://lkml.kernel.org/r/20201117202916.GA3856507@google.com Signed-off-by: Linus Torvalds --- include/linux/zsmalloc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 0fdbf653b173..4807ca4d52e0 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -20,7 +20,6 @@ * zsmalloc mapping modes * * NOTE: These only make a difference when a mapped object spans pages. - * They also have no effect when ZSMALLOC_PGTABLE_MAPPING is selected. */ enum zs_mapmode { ZS_MM_RW, /* normal read-write mapping */ -- cgit v1.2.3 From 85261c1ff156eb60fc26c378748387f2e85c6878 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Mon, 16 Nov 2020 14:56:11 +0200 Subject: mei: bus: add vtag support Add API to support vtag in communication on mei bus. Add mei_cldev_send_vtag, mei_cldev_recv_vtag and mei_cldev_recv_nonblock_vtag functions to allow sending a message with vtag set and to receive vtag of an incoming message. Cc: Sean Z Huang Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20201116125612.1660971-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/mei_cl_bus.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index 52aa4821093a..959ad7d850b4 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -95,6 +95,12 @@ ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length); ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length); ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf, size_t length); +ssize_t mei_cldev_send_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length, + u8 vtag); +ssize_t mei_cldev_recv_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length, + u8 *vtag); +ssize_t mei_cldev_recv_nonblock_vtag(struct mei_cl_device *cldev, u8 *buf, + size_t length, u8 *vtag); int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb); int mei_cldev_register_notif_cb(struct mei_cl_device *cldev, -- cgit v1.2.3 From 76437b340b242fd21952f54ba8965d21a1ffa8c8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 7 Dec 2020 10:16:01 +0100 Subject: earlycon: drop semicolon from earlycon macro Drop the trailing semicolon from the OF_EARLYCON_DECLARE() macro definition which was left when removing the array-of-pointer indirection. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20201207091601.5202-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 3e32b788c28d..e1b684e33841 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -372,7 +372,7 @@ extern const struct earlycon_id __earlycon_table_end[]; __aligned(__alignof__(struct earlycon_id)) \ = { .name = __stringify(_name), \ .compatible = compat, \ - .setup = fn }; + .setup = fn } #define EARLYCON_DECLARE(_name, fn) OF_EARLYCON_DECLARE(_name, "", fn) -- cgit v1.2.3 From 2d26c716fc49f41a63e1efe8f1f772b0adeaacef Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 7 Dec 2020 10:13:08 +0100 Subject: module: drop semicolon from version macro Drop the trailing semicolon from the MODULE_VERSION() macro definition which was left when removing the array-of-pointer indirection. Signed-off-by: Johan Hovold Signed-off-by: Jessica Yu --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 5958075ea3f4..e7a619c2457e 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -279,7 +279,7 @@ extern typeof(name) __mod_##type##__##name##_device_table \ }, \ .module_name = KBUILD_MODNAME, \ .version = _version, \ - }; + } #endif /* Optional firmware file (or files) needed by the module -- cgit v1.2.3 From c69942bda5152d764ee7d897d1627d64c7177ea1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Mon, 30 Nov 2020 16:24:15 +0100 Subject: mtd: spi-nor: Fix multiple typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a few typos in comments in the SPI NOR framework; fix them. Signed-off-by: Jonathan Neuschäfer Signed-off-by: Vignesh Raghavendra Reviewed-by: Tudor Ambarus Link: https://lore.kernel.org/r/20201130152416.1283972-1-j.neuschaefer@gmx.net --- include/linux/mtd/spi-nor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 299685d15dc2..d13958de6d8a 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -433,7 +433,7 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor) * @name: the chip type name * @hwcaps: the hardware capabilities supported by the controller driver * - * The drivers can use this fuction to scan the SPI NOR. + * The drivers can use this function to scan the SPI NOR. * In the scanning, it will try to get all the necessary information to * fill the mtd_info{} and the spi_nor{}. * -- cgit v1.2.3 From 0eba770790426553f45b8643bcd77b854e045057 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 5 Nov 2020 20:27:45 +0100 Subject: clk: composite: add devm_clk_hw_register_composite_pdata() This will simplify drivers which would only unregister the clk in their remove() op. Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20201105192746.19564-3-michael@walle.cc Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 03a5de5f99f4..33db52ff83a0 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1062,6 +1062,13 @@ struct clk_hw *clk_hw_register_composite_pdata(struct device *dev, struct clk_hw *rate_hw, const struct clk_ops *rate_ops, struct clk_hw *gate_hw, const struct clk_ops *gate_ops, unsigned long flags); +struct clk_hw *devm_clk_hw_register_composite_pdata(struct device *dev, + const char *name, const struct clk_parent_data *parent_data, + int num_parents, + struct clk_hw *mux_hw, const struct clk_ops *mux_ops, + struct clk_hw *rate_hw, const struct clk_ops *rate_ops, + struct clk_hw *gate_hw, const struct clk_ops *gate_ops, + unsigned long flags); void clk_hw_unregister_composite(struct clk_hw *hw); struct clk *clk_register(struct device *dev, struct clk_hw *hw); -- cgit v1.2.3 From d9a9280a0d0ae51dc1d4142138b99242b7ec8ac6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:10:58 +0100 Subject: seq_buf: Avoid type mismatch for seq_buf_init Building with W=2 prints a number of warnings for one function that has a pointer type mismatch: linux/seq_buf.h: In function 'seq_buf_init': linux/seq_buf.h:35:12: warning: pointer targets in assignment from 'unsigned char *' to 'char *' differ in signedness [-Wpointer-sign] Change the type in the function prototype according to the type in the structure. Link: https://lkml.kernel.org/r/20201026161108.3707783-1-arnd@kernel.org Fixes: 9a7777935c34 ("tracing: Convert seq_buf fields to be like seq_file fields") Reviewed-by: Cezary Rojewski Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt (VMware) --- include/linux/seq_buf.h | 2 +- include/linux/trace_seq.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index fb0205d87d3c..9d6c28cc4d8f 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -30,7 +30,7 @@ static inline void seq_buf_clear(struct seq_buf *s) } static inline void -seq_buf_init(struct seq_buf *s, unsigned char *buf, unsigned int size) +seq_buf_init(struct seq_buf *s, char *buf, unsigned int size) { s->buffer = buf; s->size = size; diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 6c30508fca19..5a2c650d9e1c 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -12,7 +12,7 @@ */ struct trace_seq { - unsigned char buffer[PAGE_SIZE]; + char buffer[PAGE_SIZE]; struct seq_buf seq; int full; }; @@ -51,7 +51,7 @@ static inline int trace_seq_used(struct trace_seq *s) * that is about to be written to and then return the result * of that write. */ -static inline unsigned char * +static inline char * trace_seq_buffer_ptr(struct trace_seq *s) { return s->buffer + seq_buf_used(&s->seq); -- cgit v1.2.3 From 661d4f55a79483aee4970a76e3bd9d4cdc74ac79 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 22 Nov 2020 15:35:46 +0000 Subject: sbitmap: remove swap_lock map->swap_lock protects map->cleared from concurrent modification, however sbitmap_deferred_clear() is already atomically drains it, so it's guaranteed to not loose bits on concurrent sbitmap_deferred_clear(). A one threaded tag heavy test on top of nullbk showed ~1.5% t-put increase, and 3% -> 1% cycle reduction of sbitmap_get() according to perf. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index e40d019c3d9d..74cc6384715e 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -32,11 +32,6 @@ struct sbitmap_word { * @cleared: word holding cleared bits */ unsigned long cleared ____cacheline_aligned_in_smp; - - /** - * @swap_lock: Held while swapping word <-> cleared - */ - spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** -- cgit v1.2.3 From 26792699fe3681102aa85f4ae6d39e80a6a7e6b6 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Sun, 8 Nov 2020 19:51:09 +0100 Subject: clk: divider: add devm_clk_hw_register_divider_table() This will simplify drivers which would only unregister the clk in their remove() op. Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20201108185113.31377-6-michael@walle.cc Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 33db52ff83a0..5f896df01f83 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -639,6 +639,12 @@ struct clk_hw *__clk_hw_register_divider(struct device *dev, const struct clk_parent_data *parent_data, unsigned long flags, void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock); +struct clk_hw *__devm_clk_hw_register_divider(struct device *dev, + struct device_node *np, const char *name, + const char *parent_name, const struct clk_hw *parent_hw, + const struct clk_parent_data *parent_data, unsigned long flags, + void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, + const struct clk_div_table *table, spinlock_t *lock); struct clk *clk_register_divider_table(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, @@ -779,6 +785,27 @@ struct clk *clk_register_divider_table(struct device *dev, const char *name, (parent_data), (flags), (reg), (shift), \ (width), (clk_divider_flags), (table), \ (lock)) +/** + * devm_clk_hw_register_divider_table - register a table based divider clock + * with the clock framework (devres variant) + * @dev: device registering this clock + * @name: name of this clock + * @parent_name: name of clock's parent + * @flags: framework-specific flags + * @reg: register address to adjust divider + * @shift: number of bits to shift the bitfield + * @width: width of the bitfield + * @clk_divider_flags: divider-specific flags for this clock + * @table: array of divider/value pairs ending with a div set to 0 + * @lock: shared register lock for this clock + */ +#define devm_clk_hw_register_divider_table(dev, name, parent_name, flags, \ + reg, shift, width, \ + clk_divider_flags, table, lock) \ + __devm_clk_hw_register_divider((dev), NULL, (name), (parent_name), \ + NULL, NULL, (flags), (reg), (shift), \ + (width), (clk_divider_flags), (table), \ + (lock)) void clk_unregister_divider(struct clk *clk); void clk_hw_unregister_divider(struct clk_hw *hw); -- cgit v1.2.3 From 374a96b9600ccf60083c0fec8f727e04752a7f0c Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 6 Dec 2020 11:12:54 +0200 Subject: net/mlx4: Remove unused #define MAX_MSIX_P_PORT All usages of the definition MAX_MSIX_P_PORT were removed. It's not in use anymore. Remove it. Signed-off-by: Tariq Toukan Reviewed-by: Moshe Shemesh Link: https://lore.kernel.org/r/20201206091254.12476-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/mlx4/device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 06e066e04a4b..236a7d04f891 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -46,7 +46,6 @@ #define DEFAULT_UAR_PAGE_SHIFT 12 -#define MAX_MSIX_P_PORT 17 #define MAX_MSIX 128 #define MIN_MSIX_P_PORT 5 #define MLX4_IS_LEGACY_EQ_MODE(dev_cap) ((dev_cap).num_comp_vectors < \ -- cgit v1.2.3 From fb01a2932e81a1fb2273f87ff92dc8172b8880ee Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 3 Dec 2020 09:26:36 +0800 Subject: blk-mq: add new API of blk_mq_hctx_set_fq_lock_class flush_end_io() may be called recursively from some driver, such as nvme-loop, so lockdep may complain 'possible recursive locking'. Commit b3c6a5997541("block: Fix a lockdep complaint triggered by request queue flushing") tried to address this issue by assigning dynamically allocated per-flush-queue lock class. This solution adds synchronize_rcu() for each hctx's release handler, and causes horrible SCSI MQ probe delay(more than half an hour on megaraid sas). Add new API of blk_mq_hctx_set_fq_lock_class() for these drivers, so we just need to use driver specific lock class for avoiding the lockdep warning of 'possible recursive locking'. Tested-by: Kashyap Desai Reported-by: Qian Cai Cc: Sumit Saxena Cc: John Garry Cc: Kashyap Desai Cc: Bart Van Assche Cc: Hannes Reinecke Signed-off-by: Ming Lei Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 794b2a33a2c3..5f639240760e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -5,6 +5,7 @@ #include #include #include +#include struct blk_mq_tags; struct blk_flush_queue; @@ -594,5 +595,7 @@ static inline void blk_mq_cleanup_rq(struct request *rq) } blk_qc_t blk_mq_submit_bio(struct bio *bio); +void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, + struct lock_class_key *key); #endif -- cgit v1.2.3 From 76ea4d8eeefbfdd37e47c6fd579d0d5852457618 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 24 Nov 2020 10:43:45 +0000 Subject: firmware: arm_scmi: Add power_scale_mw_get() interface Add a new interface to the existing perf_ops and export the information about the power values scale. This would be used by the cpufreq driver and Energy Model framework to set the performance domains scale: milli-Watts or abstract scale. Suggested-by: Morten Rasmussen Reviewed-by: Cristian Marussi Signed-off-by: Lukasz Luba Acked-by: Sudeep Holla Signed-off-by: Viresh Kumar --- include/linux/scmi_protocol.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 9cd312a1ff92..c77e4e11e788 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -121,6 +121,7 @@ struct scmi_perf_ops { unsigned long *rate, unsigned long *power); bool (*fast_switch_possible)(const struct scmi_handle *handle, struct device *dev); + bool (*power_scale_mw_get)(const struct scmi_handle *handle); }; /** -- cgit v1.2.3 From cc00bcaa589914096edef7fb87ca5cee4a166b5c Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 25 Nov 2020 11:27:22 -0700 Subject: netfilter: x_tables: Switch synchronization to RCU When running concurrent iptables rules replacement with data, the per CPU sequence count is checked after the assignment of the new information. The sequence count is used to synchronize with the packet path without the use of any explicit locking. If there are any packets in the packet path using the table information, the sequence count is incremented to an odd value and is incremented to an even after the packet process completion. The new table value assignment is followed by a write memory barrier so every CPU should see the latest value. If the packet path has started with the old table information, the sequence counter will be odd and the iptables replacement will wait till the sequence count is even prior to freeing the old table info. However, this assumes that the new table information assignment and the memory barrier is actually executed prior to the counter check in the replacement thread. If CPU decides to execute the assignment later as there is no user of the table information prior to the sequence check, the packet path in another CPU may use the old table information. The replacement thread would then free the table information under it leading to a use after free in the packet processing context- Unable to handle kernel NULL pointer dereference at virtual address 000000000000008e pc : ip6t_do_table+0x5d0/0x89c lr : ip6t_do_table+0x5b8/0x89c ip6t_do_table+0x5d0/0x89c ip6table_filter_hook+0x24/0x30 nf_hook_slow+0x84/0x120 ip6_input+0x74/0xe0 ip6_rcv_finish+0x7c/0x128 ipv6_rcv+0xac/0xe4 __netif_receive_skb+0x84/0x17c process_backlog+0x15c/0x1b8 napi_poll+0x88/0x284 net_rx_action+0xbc/0x23c __do_softirq+0x20c/0x48c This could be fixed by forcing instruction order after the new table information assignment or by switching to RCU for the synchronization. Fixes: 80055dab5de0 ("netfilter: x_tables: make xt_replace_table wait until old rules are not used anymore") Reported-by: Sean Tranchetti Reported-by: kernel test robot Suggested-by: Florian Westphal Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5deb099d156d..8ebb64193757 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -227,7 +227,7 @@ struct xt_table { unsigned int valid_hooks; /* Man behind the curtain... */ - struct xt_table_info *private; + struct xt_table_info __rcu *private; /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -448,6 +448,9 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); +struct xt_table_info +*xt_table_get_private_protected(const struct xt_table *table); + #ifdef CONFIG_COMPAT #include -- cgit v1.2.3 From 2f24dfb71208eeb0174f08dd56ca6d3c17b279a5 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 02:34:50 +0800 Subject: iommu: Delete split_and_remove_iova() Function split_and_remove_iova() has not been referenced since commit e70b081c6f37 ("iommu/vt-d: Remove IOVA handling code from the non-dma_ops path"), so delete it. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1607020492-189471-2-git-send-email-john.garry@huawei.com Signed-off-by: Will Deacon --- include/linux/iova.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index a0637abffee8..a77add571c50 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -160,8 +160,6 @@ int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); -struct iova *split_and_remove_iova(struct iova_domain *iovad, - struct iova *iova, unsigned long pfn_lo, unsigned long pfn_hi); void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); #else static inline int iova_cache_get(void) @@ -258,14 +256,6 @@ static inline void put_iova_domain(struct iova_domain *iovad) { } -static inline struct iova *split_and_remove_iova(struct iova_domain *iovad, - struct iova *iova, - unsigned long pfn_lo, - unsigned long pfn_hi) -{ - return NULL; -} - static inline void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) { -- cgit v1.2.3 From 51b70b817b187e48155fc492adb9ce80bdb21b70 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 02:34:51 +0800 Subject: iommu: Stop exporting alloc_iova_mem() It is not used outside iova.c Signed-off-by: John Garry Link: https://lore.kernel.org/r/1607020492-189471-3-git-send-email-john.garry@huawei.com Signed-off-by: Will Deacon --- include/linux/iova.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index a77add571c50..d8c011b6d4ed 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -136,7 +136,6 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) int iova_cache_get(void); void iova_cache_put(void); -struct iova *alloc_iova_mem(void); void free_iova_mem(struct iova *iova); void free_iova(struct iova_domain *iovad, unsigned long pfn); void __free_iova(struct iova_domain *iovad, struct iova *iova); @@ -171,11 +170,6 @@ static inline void iova_cache_put(void) { } -static inline struct iova *alloc_iova_mem(void) -{ - return NULL; -} - static inline void free_iova_mem(struct iova *iova) { } -- cgit v1.2.3 From 176cfc187c24287a363e7612cd2385ba40c2042b Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 02:34:52 +0800 Subject: iommu: Stop exporting free_iova_mem() It has no user outside iova.c Signed-off-by: John Garry Link: https://lore.kernel.org/r/1607020492-189471-4-git-send-email-john.garry@huawei.com Signed-off-by: Will Deacon --- include/linux/iova.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index d8c011b6d4ed..76e16ae20729 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -136,7 +136,6 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) int iova_cache_get(void); void iova_cache_put(void); -void free_iova_mem(struct iova *iova); void free_iova(struct iova_domain *iovad, unsigned long pfn); void __free_iova(struct iova_domain *iovad, struct iova *iova); struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, @@ -170,10 +169,6 @@ static inline void iova_cache_put(void) { } -static inline void free_iova_mem(struct iova *iova) -{ -} - static inline void free_iova(struct iova_domain *iovad, unsigned long pfn) { } -- cgit v1.2.3 From fefe8527a1e0e0014946c6b5b3b2e40cb32bb5d3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 25 Nov 2020 17:29:39 +0000 Subject: iommu/io-pgtable: Remove tlb_flush_leaf The only user of tlb_flush_leaf is a particularly hairy corner of the Arm short-descriptor code, which wants a synchronous invalidation to minimise the races inherent in trying to split a large page mapping. This is already far enough into "here be dragons" territory that no sensible caller should ever hit it, and thus it really doesn't need optimising. Although using tlb_flush_walk there may technically be more heavyweight than needed, it does the job and saves everyone else having to carry around useless baggage. Signed-off-by: Robin Murphy Reviewed-by: Steven Price Link: https://lore.kernel.org/r/9844ab0c5cb3da8b2f89c6c2da16941910702b41.1606324115.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index fb4d5a763e0c..ea727eb1a1a9 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -25,8 +25,6 @@ enum io_pgtable_fmt { * @tlb_flush_walk: Synchronously invalidate all intermediate TLB state * (sometimes referred to as the "walk cache") for a virtual * address range. - * @tlb_flush_leaf: Synchronously invalidate all leaf TLB state for a virtual - * address range. * @tlb_add_page: Optional callback to queue up leaf TLB invalidation for a * single page. IOMMUs that cannot batch TLB invalidation * operations efficiently will typically issue them here, but @@ -40,8 +38,6 @@ struct iommu_flush_ops { void (*tlb_flush_all)(void *cookie); void (*tlb_flush_walk)(unsigned long iova, size_t size, size_t granule, void *cookie); - void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule, - void *cookie); void (*tlb_add_page)(struct iommu_iotlb_gather *gather, unsigned long iova, size_t granule, void *cookie); }; @@ -228,13 +224,6 @@ io_pgtable_tlb_flush_walk(struct io_pgtable *iop, unsigned long iova, iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie); } -static inline void -io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova, - size_t size, size_t granule) -{ - iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie); -} - static inline void io_pgtable_tlb_add_page(struct io_pgtable *iop, struct iommu_iotlb_gather * gather, unsigned long iova, -- cgit v1.2.3 From 1080399542075bb0e9d46ea80418d76784d1ece8 Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Sat, 28 Nov 2020 07:09:23 +0530 Subject: PM / EM: Micro optimization in em_cpu_energy When the sum of the utilization of CPUs in a power domain is zero, return the energy as 0 without doing any computations. Acked-by: Quentin Perret Reviewed-by: Dietmar Eggemann Signed-off-by: Pavankumar Kondeti Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 9618c0a46ef4..757fc60658fa 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -106,6 +106,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, struct em_perf_state *ps; int i, cpu; + if (!sum_util) + return 0; + /* * In order to predict the performance state, map the utilization of * the most utilized CPU of the performance domain to a requested -- cgit v1.2.3 From b577562ccc072ab4b09243740ebeca52309eecd2 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 8 Dec 2020 13:16:22 +0100 Subject: PCI: Remove unused HAVE_PCI_SET_MWI Remove unused HAVE_PCI_SET_MWI. Link: https://lore.kernel.org/r/03f20cac-708d-7897-c7c7-cb4e63cfd991@gmail.com Signed-off-by: Heiner Kallweit Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 22207a79762c..fe824afc0e91 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1190,7 +1190,6 @@ void pci_clear_master(struct pci_dev *dev); int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state); int pci_set_cacheline_size(struct pci_dev *dev); -#define HAVE_PCI_SET_MWI int __must_check pci_set_mwi(struct pci_dev *dev); int __must_check pcim_set_mwi(struct pci_dev *dev); int pci_try_set_mwi(struct pci_dev *dev); -- cgit v1.2.3 From f119cc9818eb33b66e977ad3af75aef6500bbdc3 Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Mon, 7 Dec 2020 18:51:41 +0800 Subject: net: stmmac: overwrite the dma_cap.addr64 according to HW design The current IP register MAC_HW_Feature1[ADDR64] only defines 32/40/64 bit width, but some SOCs support others like i.MX8MP support 34 bits but it maps to 40 bits width in MAC_HW_Feature1[ADDR64]. So overwrite dma_cap.addr64 according to HW real design. Fixes: 94abdad6974a ("net: ethernet: dwmac: add ethernet glue logic for NXP imx8 chip") Signed-off-by: Fugang Duan Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 628e28903b8b..15ca6b4167cc 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -170,6 +170,7 @@ struct plat_stmmacenet_data { int unicast_filter_entries; int tx_fifo_size; int rx_fifo_size; + u32 addr64; u32 rx_queues_to_use; u32 tx_queues_to_use; u8 rx_sched_algorithm; -- cgit v1.2.3 From b60da4955f53d1f50e44351a9c3a37a92503079e Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Tue, 8 Dec 2020 18:36:23 +0100 Subject: bpf: Only provide bpf_sock_from_file with CONFIG_NET This moves the bpf_sock_from_file definition into net/core/filter.c which only gets compiled with CONFIG_NET and also moves the helper proto usage next to other tracing helpers that are conditional on CONFIG_NET. This avoids ld: kernel/trace/bpf_trace.o: in function `bpf_sock_from_file': bpf_trace.c:(.text+0xe23): undefined reference to `sock_from_file' When compiling a kernel with BPF and without NET. Reported-by: kernel test robot Reported-by: Randy Dunlap Signed-off-by: Florent Revest Signed-off-by: Alexei Starovoitov Acked-by: Randy Dunlap Acked-by: Martin KaFai Lau Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20201208173623.1136863-1-revest@chromium.org --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d05e75ed8c1b..07cb5d15e743 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1859,6 +1859,7 @@ extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; +extern const struct bpf_func_proto bpf_sock_from_file_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); -- cgit v1.2.3 From e77dcb0b732dd355ca594909f6c2085dfc46cde2 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 6 Nov 2020 10:37:16 +0530 Subject: opp: Don't create an OPP table from dev_pm_opp_get_opp_table() It has been found that some users (like cpufreq-dt and others on LKML) have abused the helper dev_pm_opp_get_opp_table() to create the OPP table instead of just finding it, which is the wrong thing to do. This routine was meant for OPP core's internal working and exposed the whole functionality by mistake. Change the scope of dev_pm_opp_get_opp_table() to only finding the table. The internal helpers _opp_get_opp_table*() are thus renamed to _add_opp_table*(), dev_pm_opp_get_opp_table_indexed() is removed (as we don't need the index field for finding the OPP table) and so the only user, genpd, is updated. Note that the prototype of _add_opp_table() was already left in opp.h by mistake when it was removed earlier and so we weren't required to add it now. Acked-by: Ulf Hansson Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index dbb484524f82..1435c054016a 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -90,7 +90,6 @@ struct dev_pm_set_opp_data { #if defined(CONFIG_PM_OPP) struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); -struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *dev, int index); void dev_pm_opp_put_opp_table(struct opp_table *opp_table); unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp); -- cgit v1.2.3 From e4a9378083c57a22624cda8b780ff5f5da4de7ef Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 1 Dec 2020 19:17:33 -0800 Subject: usb: typec: tcpm: Pass down negotiated rev to update retry count nRetryCount was updated from 3 to 2 between PD2.0 and PD3.0 spec. nRetryCount in "Table 6-34 Counter parameters" of the PD 2.0 spec is set to 3, whereas, nRetryCount in "Table 6-59 Counter parameters" is set to 2. Pass down negotiated rev in pd_transmit so that low level chip drivers can update the retry count accordingly before attempting packet transmission. This helps in passing "TEST.PD.PORT.ALL.02" of the "Power Delivery Merged" test suite which was initially failing with "The UUT did not retransmit the message nReryCount times" In fusb302 & tcpci drivers, by default the driver sets the retry count to 3 (Default for PD 2.0). Update this to 2, if the negotiated rev is PD 3.0. In wcove, since the retry count is intentionally set to max, leaving it as is. Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20201202031733.647808-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index e68aaa12886f..efaedd7e8a18 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -121,7 +121,7 @@ struct tcpc_dev { enum typec_cc_status cc); int (*try_role)(struct tcpc_dev *dev, int role); int (*pd_transmit)(struct tcpc_dev *dev, enum tcpm_transmit_type type, - const struct pd_message *msg); + const struct pd_message *msg, unsigned int negotiated_rev); int (*set_bist_data)(struct tcpc_dev *dev, bool on); int (*enable_frs)(struct tcpc_dev *dev, bool enable); void (*frs_sourcing_vbus)(struct tcpc_dev *dev); -- cgit v1.2.3 From 28b43d3d746b89fc112fe681e018b39b43495dad Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 1 Dec 2020 20:08:38 -0800 Subject: usb: typec: tcpm: Introduce vsafe0v for vbus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TCPM at present lacks the notion of VSAFE0V. There are three vbus threshold levels that are critical to track: a. vSafe5V - VBUS “5 volts” as defined by the USB PD specification. b. vSinkDisconnect - Threshold used for transition from Attached.SNK to Unattached.SNK. c. vSafe0V - VBUS “0 volts” as defined by the USB PD specification. Tracking vSafe0V is crucial for entry into Try.SNK and Attached.SRC and turning vbus back on by the source in response to hard reset. >From "4.5.2.2.8.2 Exiting from AttachWait.SRC State" section in the Type-C spec: "The port shall transition to Attached.SRC when VBUS is at vSafe0V and the SRC.Rd state is detected on exactly one of the CC1 or CC2 pins for at least tCCDebounce." "A DRP that strongly prefers the Sink role may optionally transition to Try.SNK instead of Attached.SRC when VBUS is at vSafe0V and the SRC.Rd state is detected on exactly one of the CC1 or CC2 pins for at least tCCDebounce." >From "7.1.5 Response to Hard Resets" section in the PD spec: "After establishing the vSafe0V voltage condition on VBUS, the Source Shall wait tSrcRecover before re-applying VCONN and restoring VBUS to vSafe5V." vbus_present in the TCPM code tracks vSafe5V(vbus_present is true) and vSinkDisconnect(vbus_present is false). This change adds is_vbus_vsafe0v callback which when set makes TCPM query for vSafe0V voltage level when needed. Since not all TCPC controllers might have the capability to report vSafe0V, TCPM assumes that vSafe0V is same as vSinkDisconnect when is_vbus_vsafe0v callback is not set. This allows TCPM to continue to support controllers which don't have the support for reporting vSafe0V. Introducing vSafe0V helps fix the failure reported at "Step 15. CVS verifies PUT remains in AttachWait.SRC for 500ms" of "TD 4.7.2 Try. SNK DRP Connect DRP Test" of "Universal Serial Bus Type-C (USB Type-C) Functional Test Specification Chapters 4 and 5". Here the compliance tester intentionally maintains vbus at greater than vSafe0V and expects the Product under test to stay in AttachWait.SRC till vbus drops to vSafe0V. Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20201202040840.663578-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index efaedd7e8a18..f4a18427f5c4 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -98,6 +98,12 @@ enum tcpm_transmit_type { * will be turned on. requested_vbus_voltage is set to 0 when vbus * is going to disappear knowingly i.e. during PR_SWAP and * HARD_RESET etc. + * @is_vbus_vsafe0v: + * Optional; TCPCI spec based TCPC implementations are expected to + * detect VSAFE0V voltage level at vbus. When detection of VSAFE0V + * is supported by TCPC, set this callback for TCPM to query + * whether vbus is at VSAFE0V when needed. + * Returns true when vbus is at VSAFE0V, false otherwise. */ struct tcpc_dev { struct fwnode_handle *fwnode; @@ -128,6 +134,7 @@ struct tcpc_dev { int (*enable_auto_vbus_discharge)(struct tcpc_dev *dev, bool enable); int (*set_auto_vbus_discharge_threshold)(struct tcpc_dev *dev, enum typec_pwr_opmode mode, bool pps_active, u32 requested_vbus_voltage); + bool (*is_vbus_vsafe0v)(struct tcpc_dev *dev); }; struct tcpm_port; -- cgit v1.2.3 From af633212c4aac1dbd3a48615a834646ce072346d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 2 Dec 2020 12:39:36 +0100 Subject: tty: use assign_bit() in port-flag accessors Use the new assign_bit() wrapper in the port-flag accessors instead of open coding. Suggested-by: Jiri Slaby Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20201202113942.27024-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 0c2f97ba8018..9b8e7d5bf2fc 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -615,10 +615,7 @@ static inline bool tty_port_cts_enabled(struct tty_port *port) static inline void tty_port_set_cts_flow(struct tty_port *port, bool val) { - if (val) - set_bit(TTY_PORT_CTS_FLOW, &port->iflags); - else - clear_bit(TTY_PORT_CTS_FLOW, &port->iflags); + assign_bit(TTY_PORT_CTS_FLOW, &port->iflags, val); } static inline bool tty_port_active(struct tty_port *port) @@ -628,10 +625,7 @@ static inline bool tty_port_active(struct tty_port *port) static inline void tty_port_set_active(struct tty_port *port, bool val) { - if (val) - set_bit(TTY_PORT_ACTIVE, &port->iflags); - else - clear_bit(TTY_PORT_ACTIVE, &port->iflags); + assign_bit(TTY_PORT_ACTIVE, &port->iflags, val); } static inline bool tty_port_check_carrier(struct tty_port *port) @@ -641,10 +635,7 @@ static inline bool tty_port_check_carrier(struct tty_port *port) static inline void tty_port_set_check_carrier(struct tty_port *port, bool val) { - if (val) - set_bit(TTY_PORT_CHECK_CD, &port->iflags); - else - clear_bit(TTY_PORT_CHECK_CD, &port->iflags); + assign_bit(TTY_PORT_CHECK_CD, &port->iflags, val); } static inline bool tty_port_suspended(struct tty_port *port) @@ -654,10 +645,7 @@ static inline bool tty_port_suspended(struct tty_port *port) static inline void tty_port_set_suspended(struct tty_port *port, bool val) { - if (val) - set_bit(TTY_PORT_SUSPENDED, &port->iflags); - else - clear_bit(TTY_PORT_SUSPENDED, &port->iflags); + assign_bit(TTY_PORT_SUSPENDED, &port->iflags, val); } static inline bool tty_port_initialized(struct tty_port *port) @@ -667,10 +655,7 @@ static inline bool tty_port_initialized(struct tty_port *port) static inline void tty_port_set_initialized(struct tty_port *port, bool val) { - if (val) - set_bit(TTY_PORT_INITIALIZED, &port->iflags); - else - clear_bit(TTY_PORT_INITIALIZED, &port->iflags); + assign_bit(TTY_PORT_INITIALIZED, &port->iflags, val); } static inline bool tty_port_kopened(struct tty_port *port) @@ -680,10 +665,7 @@ static inline bool tty_port_kopened(struct tty_port *port) static inline void tty_port_set_kopened(struct tty_port *port, bool val) { - if (val) - set_bit(TTY_PORT_KOPENED, &port->iflags); - else - clear_bit(TTY_PORT_KOPENED, &port->iflags); + assign_bit(TTY_PORT_KOPENED, &port->iflags, val); } extern struct tty_struct *tty_port_tty_get(struct tty_port *port); -- cgit v1.2.3 From 9e1792727ead477f49958578d0dbd466a7deea48 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 2 Dec 2020 12:39:37 +0100 Subject: tty: use const parameters in port-flag accessors Declare the port parameter to the flag-test accessors as const. This is currently mostly cosmetic as the accessors are already inlined. Suggested-by: Jiri Slaby Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20201202113942.27024-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 9b8e7d5bf2fc..c873f475f0a7 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -608,7 +608,7 @@ static inline struct tty_port *tty_port_get(struct tty_port *port) } /* If the cts flow control is enabled, return true. */ -static inline bool tty_port_cts_enabled(struct tty_port *port) +static inline bool tty_port_cts_enabled(const struct tty_port *port) { return test_bit(TTY_PORT_CTS_FLOW, &port->iflags); } @@ -618,7 +618,7 @@ static inline void tty_port_set_cts_flow(struct tty_port *port, bool val) assign_bit(TTY_PORT_CTS_FLOW, &port->iflags, val); } -static inline bool tty_port_active(struct tty_port *port) +static inline bool tty_port_active(const struct tty_port *port) { return test_bit(TTY_PORT_ACTIVE, &port->iflags); } @@ -628,7 +628,7 @@ static inline void tty_port_set_active(struct tty_port *port, bool val) assign_bit(TTY_PORT_ACTIVE, &port->iflags, val); } -static inline bool tty_port_check_carrier(struct tty_port *port) +static inline bool tty_port_check_carrier(const struct tty_port *port) { return test_bit(TTY_PORT_CHECK_CD, &port->iflags); } @@ -638,7 +638,7 @@ static inline void tty_port_set_check_carrier(struct tty_port *port, bool val) assign_bit(TTY_PORT_CHECK_CD, &port->iflags, val); } -static inline bool tty_port_suspended(struct tty_port *port) +static inline bool tty_port_suspended(const struct tty_port *port) { return test_bit(TTY_PORT_SUSPENDED, &port->iflags); } @@ -648,7 +648,7 @@ static inline void tty_port_set_suspended(struct tty_port *port, bool val) assign_bit(TTY_PORT_SUSPENDED, &port->iflags, val); } -static inline bool tty_port_initialized(struct tty_port *port) +static inline bool tty_port_initialized(const struct tty_port *port) { return test_bit(TTY_PORT_INITIALIZED, &port->iflags); } @@ -658,7 +658,7 @@ static inline void tty_port_set_initialized(struct tty_port *port, bool val) assign_bit(TTY_PORT_INITIALIZED, &port->iflags, val); } -static inline bool tty_port_kopened(struct tty_port *port) +static inline bool tty_port_kopened(const struct tty_port *port) { return test_bit(TTY_PORT_KOPENED, &port->iflags); } -- cgit v1.2.3 From 4b03d99794eeed27650597a886247c6427ce1055 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 30 Nov 2020 17:46:16 -0500 Subject: nfsd: minor nfsd4_change_attribute cleanup Minor cleanup, no change in behavior. Also pull out a common helper that'll be useful elsewhere. Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- include/linux/iversion.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iversion.h b/include/linux/iversion.h index 2917ef990d43..3bfebde5a1a6 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -328,6 +328,19 @@ inode_query_iversion(struct inode *inode) return cur >> I_VERSION_QUERIED_SHIFT; } +/* + * For filesystems without any sort of change attribute, the best we can + * do is fake one up from the ctime: + */ +static inline u64 time_to_chattr(struct timespec64 *t) +{ + u64 chattr = t->tv_sec; + + chattr <<= 32; + chattr += t->tv_nsec; + return chattr; +} + /** * inode_eq_iversion_raw - check whether the raw i_version counter has changed * @inode: inode to check -- cgit v1.2.3 From 1631087ba8727db03c0ab2815dc06dc25d962b80 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 30 Nov 2020 17:46:18 -0500 Subject: Revert "nfsd4: support change_attr_type attribute" This reverts commit a85857633b04d57f4524cca0a2bfaf87b2543f9f. We're still factoring ctime into our change attribute even in the IS_I_VERSION case. If someone sets the system time backwards, a client could see the change attribute go backwards. Maybe we can just say "well, don't do that", but there's some question whether that's good enough, or whether we need a better guarantee. Also, the client still isn't actually using the attribute. While we're still figuring this out, let's just stop returning this attribute. Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- include/linux/nfs4.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 9dc7eeac924f..5b4c67c91f56 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -385,13 +385,6 @@ enum lock_type4 { NFS4_WRITEW_LT = 4 }; -enum change_attr_type4 { - NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0, - NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1, - NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2, - NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3, - NFS4_CHANGE_TYPE_IS_UNDEFINED = 4 -}; /* Mandatory Attributes */ #define FATTR4_WORD0_SUPPORTED_ATTRS (1UL << 0) @@ -459,7 +452,6 @@ enum change_attr_type4 { #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) #define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) -#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) #define FATTR4_WORD2_MODE_UMASK (1UL << 17) #define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18) -- cgit v1.2.3 From daab110e47f8d7aa6da66923e3ac1a8dbd2b2a72 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Nov 2020 17:03:14 -0500 Subject: nfsd: add a new EXPORT_OP_NOWCC flag to struct export_operations With NFSv3 nfsd will always attempt to send along WCC data to the client. This generally involves saving off the in-core inode information prior to doing the operation on the given filehandle, and then issuing a vfs_getattr to it after the op. Some filesystems (particularly clustered or networked ones) have an expensive ->getattr inode operation. Atomicity is also often difficult or impossible to guarantee on such filesystems. For those, we're best off not trying to provide WCC information to the client at all, and to simply allow it to poll for that information as needed with a GETATTR RPC. This patch adds a new flags field to struct export_operations, and defines a new EXPORT_OP_NOWCC flag that filesystems can use to indicate that nfsd should not attempt to provide WCC info in NFSv3 replies. It also adds a blurb about the new flags field and flag to the exporting documentation. The server will also now skip collecting this information for NFSv2 as well, since that info is never used there anyway. Note that this patch does not add this flag to any filesystem export_operations structures. This was originally developed to allow reexporting nfs via nfsd. Other filesystems may want to consider enabling this flag too. It's hard to tell however which ones have export operations to enable export via knfsd and which ones mostly rely on them for open-by-filehandle support, so I'm leaving that up to the individual maintainers to decide. I am cc'ing the relevant lists for those filesystems that I think may want to consider adding this though. Cc: HPDD-discuss@lists.01.org Cc: ceph-devel@vger.kernel.org Cc: cluster-devel@redhat.com Cc: fuse-devel@lists.sourceforge.net Cc: ocfs2-devel@oss.oracle.com Signed-off-by: Jeff Layton Signed-off-by: Lance Shelton Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3ceb72b67a7a..e7de0103a32e 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -213,6 +213,8 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); +#define EXPORT_OP_NOWCC (0x1) /* Don't collect wcc data for NFSv3 replies */ + unsigned long flags; }; extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, -- cgit v1.2.3 From ba5e8187c55555519ae0b63c0fb681391bc42af9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Nov 2020 17:03:15 -0500 Subject: nfsd: allow filesystems to opt out of subtree checking When we start allowing NFS to be reexported, then we have some problems when it comes to subtree checking. In principle, we could allow it, but it would mean encoding parent info in the filehandles and there may not be enough space for that in a NFSv3 filehandle. To enforce this at export upcall time, we add a new export_ops flag that declares the filesystem ineligible for subtree checking. Signed-off-by: Jeff Layton Signed-off-by: Lance Shelton Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index e7de0103a32e..2fcbab0f6b61 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -214,6 +214,7 @@ struct export_operations { int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); #define EXPORT_OP_NOWCC (0x1) /* Don't collect wcc data for NFSv3 replies */ +#define EXPORT_OP_NOSUBTREECHK (0x2) /* Subtree checking is not supported! */ unsigned long flags; }; -- cgit v1.2.3 From 7f84b488f9add1d5cca3e6197c95914c7bd3c1cf Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Nov 2020 17:03:16 -0500 Subject: nfsd: close cached files prior to a REMOVE or RENAME that would replace target It's not uncommon for some workloads to do a bunch of I/O to a file and delete it just afterward. If knfsd has a cached open file however, then the file may still be open when the dentry is unlinked. If the underlying filesystem is nfs, then that could trigger it to do a sillyrename. On a REMOVE or RENAME scan the nfsd_file cache for open files that correspond to the inode, and proactively unhash and put their references. This should prevent any delete-on-last-close activity from occurring, solely due to knfsd's open file cache. This must be done synchronously though so we use the variants that call flush_delayed_fput. There are deadlock possibilities if you call flush_delayed_fput while holding locks, however. In the case of nfsd_rename, we don't even do the lookups of the dentries to be renamed until we've locked for rename. Once we've figured out what the target dentry is for a rename, check to see whether there are cached open files associated with it. If there are, then unwind all of the locking, close them all, and then reattempt the rename. None of this is really necessary for "typical" filesystems though. It's mostly of use for NFS, so declare a new export op flag and use that to determine whether to close the files beforehand. Signed-off-by: Jeff Layton Signed-off-by: Lance Shelton Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 2fcbab0f6b61..d829403ffd3b 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -213,8 +213,9 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); -#define EXPORT_OP_NOWCC (0x1) /* Don't collect wcc data for NFSv3 replies */ -#define EXPORT_OP_NOSUBTREECHK (0x2) /* Subtree checking is not supported! */ +#define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ +#define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ +#define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ unsigned long flags; }; -- cgit v1.2.3 From d045465fc6cbfa4acfb5a7d817a7c1a57a078109 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Nov 2020 17:03:17 -0500 Subject: exportfs: Add a function to return the raw output from fh_to_dentry() In order to allow nfsd to accept return values that are not acceptable to overlayfs and others, add a new function. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index d829403ffd3b..846df3c96730 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -223,6 +223,11 @@ extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len, struct inode *parent); extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, int connectable); +extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt, + struct fid *fid, int fh_len, + int fileid_type, + int (*acceptable)(void *, struct dentry *), + void *context); extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *), void *context); -- cgit v1.2.3 From 01cbf3853959feec40ec9b9a399e12a021cd4d81 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Nov 2020 17:03:19 -0500 Subject: nfsd: Set PF_LOCAL_THROTTLE on local filesystems only Don't set PF_LOCAL_THROTTLE on remote filesystems like NFS, since they aren't expected to ever be subject to double buffering. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 846df3c96730..d93e8a6737bb 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -216,6 +216,7 @@ struct export_operations { #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ +#define EXPORT_OP_REMOTE_FS (0x8) /* Filesystem is remote */ unsigned long flags; }; -- cgit v1.2.3 From 716a8bc7f706eeef80ab42c99d9f210eda845c81 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Nov 2020 23:14:27 -0500 Subject: nfsd: Record NFSv4 pre/post-op attributes as non-atomic For the case of NFSv4, specify to the client that the pre/post-op attributes were not recorded atomically with the main operation. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/exportfs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index d93e8a6737bb..9f4d4bcbf251 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -217,6 +217,9 @@ struct export_operations { #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ #define EXPORT_OP_REMOTE_FS (0x8) /* Filesystem is remote */ +#define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply + atomic attribute updates + */ unsigned long flags; }; -- cgit v1.2.3 From 0f9368b5bf6db0c04afc5454b1be79022a681615 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 3 Dec 2020 14:10:32 -0600 Subject: rwsem: Implement down_read_killable_nested In preparation for converting exec_update_mutex to a rwsem so that multiple readers can execute in parallel and not deadlock, add down_read_killable_nested. This is needed so that kcmp_lock can be converted from working on a mutexes to working on rw_semaphores. Signed-off-by: Eric W. Biederman Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/87o8jabqh3.fsf@x220.int.ebiederm.org --- include/linux/rwsem.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 25e3fde85617..13021b08b2ed 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -171,6 +171,7 @@ extern void downgrade_write(struct rw_semaphore *sem); * See Documentation/locking/lockdep-design.rst for more details.) */ extern void down_read_nested(struct rw_semaphore *sem, int subclass); +extern int __must_check down_read_killable_nested(struct rw_semaphore *sem, int subclass); extern void down_write_nested(struct rw_semaphore *sem, int subclass); extern int down_write_killable_nested(struct rw_semaphore *sem, int subclass); extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock); @@ -191,6 +192,7 @@ extern void down_read_non_owner(struct rw_semaphore *sem); extern void up_read_non_owner(struct rw_semaphore *sem); #else # define down_read_nested(sem, subclass) down_read(sem) +# define down_read_killable_nested(sem, subclass) down_read_killable(sem) # define down_write_nest_lock(sem, nest_lock) down_write(sem) # define down_write_nested(sem, subclass) down_write(sem) # define down_write_killable_nested(sem, subclass) down_write_killable(sem) -- cgit v1.2.3 From 31784cff7ee073b34d6eddabb95e3be2880a425c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 3 Dec 2020 14:11:13 -0600 Subject: rwsem: Implement down_read_interruptible In preparation for converting exec_update_mutex to a rwsem so that multiple readers can execute in parallel and not deadlock, add down_read_interruptible. This is needed for perf_event_open to be converted (with no semantic changes) from working on a mutex to wroking on a rwsem. Signed-off-by: Eric W. Biederman Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/87k0tybqfy.fsf@x220.int.ebiederm.org --- include/linux/rwsem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 13021b08b2ed..4c715be48717 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -123,6 +123,7 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem) * lock for reading */ extern void down_read(struct rw_semaphore *sem); +extern int __must_check down_read_interruptible(struct rw_semaphore *sem); extern int __must_check down_read_killable(struct rw_semaphore *sem); /* -- cgit v1.2.3 From 66bcfcdf89d00f2409f4b5da0f8c20c08318dc72 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Sun, 6 Dec 2020 17:21:42 +0100 Subject: seqlock: Prefix internal seqcount_t-only macros with a "do_" When the seqcount_LOCKNAME_t group of data types were introduced, two classes of seqlock.h sequence counter macros were added: - An external public API which can either take a plain seqcount_t or any of the seqcount_LOCKNAME_t variants. - An internal API which takes only a plain seqcount_t. To distinguish between the two groups, the "*_seqcount_t_*" pattern was used for the latter. This confused a number of mm/ call-site developers, and Linus also commented that it was not a standard practice for marking seqlock.h internal APIs. Distinguish the latter group of macros by prefixing a "do_". Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/CAHk-=wikhGExmprXgaW+MVXG1zsGpztBbVwOb23vetk41EtTBQ@mail.gmail.com --- include/linux/seqlock.h | 66 ++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index d89134c74fba..235cbc65fd71 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -425,9 +425,9 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu * Return: true if a read section retry is required, else false */ #define __read_seqcount_retry(s, start) \ - __read_seqcount_t_retry(seqprop_ptr(s), start) + do___read_seqcount_retry(seqprop_ptr(s), start) -static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) +static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) { kcsan_atomic_next(0); return unlikely(READ_ONCE(s->sequence) != start); @@ -445,12 +445,12 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) * Return: true if a read section retry is required, else false */ #define read_seqcount_retry(s, start) \ - read_seqcount_t_retry(seqprop_ptr(s), start) + do_read_seqcount_retry(seqprop_ptr(s), start) -static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start) +static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start) { smp_rmb(); - return __read_seqcount_t_retry(s, start); + return do___read_seqcount_retry(s, start); } /** @@ -462,10 +462,10 @@ do { \ if (seqprop_preemptible(s)) \ preempt_disable(); \ \ - raw_write_seqcount_t_begin(seqprop_ptr(s)); \ + do_raw_write_seqcount_begin(seqprop_ptr(s)); \ } while (0) -static inline void raw_write_seqcount_t_begin(seqcount_t *s) +static inline void do_raw_write_seqcount_begin(seqcount_t *s) { kcsan_nestable_atomic_begin(); s->sequence++; @@ -478,13 +478,13 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s) */ #define raw_write_seqcount_end(s) \ do { \ - raw_write_seqcount_t_end(seqprop_ptr(s)); \ + do_raw_write_seqcount_end(seqprop_ptr(s)); \ \ if (seqprop_preemptible(s)) \ preempt_enable(); \ } while (0) -static inline void raw_write_seqcount_t_end(seqcount_t *s) +static inline void do_raw_write_seqcount_end(seqcount_t *s) { smp_wmb(); s->sequence++; @@ -506,12 +506,12 @@ do { \ if (seqprop_preemptible(s)) \ preempt_disable(); \ \ - write_seqcount_t_begin_nested(seqprop_ptr(s), subclass); \ + do_write_seqcount_begin_nested(seqprop_ptr(s), subclass); \ } while (0) -static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass) +static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass) { - raw_write_seqcount_t_begin(s); + do_raw_write_seqcount_begin(s); seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); } @@ -533,12 +533,12 @@ do { \ if (seqprop_preemptible(s)) \ preempt_disable(); \ \ - write_seqcount_t_begin(seqprop_ptr(s)); \ + do_write_seqcount_begin(seqprop_ptr(s)); \ } while (0) -static inline void write_seqcount_t_begin(seqcount_t *s) +static inline void do_write_seqcount_begin(seqcount_t *s) { - write_seqcount_t_begin_nested(s, 0); + do_write_seqcount_begin_nested(s, 0); } /** @@ -549,16 +549,16 @@ static inline void write_seqcount_t_begin(seqcount_t *s) */ #define write_seqcount_end(s) \ do { \ - write_seqcount_t_end(seqprop_ptr(s)); \ + do_write_seqcount_end(seqprop_ptr(s)); \ \ if (seqprop_preemptible(s)) \ preempt_enable(); \ } while (0) -static inline void write_seqcount_t_end(seqcount_t *s) +static inline void do_write_seqcount_end(seqcount_t *s) { seqcount_release(&s->dep_map, _RET_IP_); - raw_write_seqcount_t_end(s); + do_raw_write_seqcount_end(s); } /** @@ -603,9 +603,9 @@ static inline void write_seqcount_t_end(seqcount_t *s) * } */ #define raw_write_seqcount_barrier(s) \ - raw_write_seqcount_t_barrier(seqprop_ptr(s)) + do_raw_write_seqcount_barrier(seqprop_ptr(s)) -static inline void raw_write_seqcount_t_barrier(seqcount_t *s) +static inline void do_raw_write_seqcount_barrier(seqcount_t *s) { kcsan_nestable_atomic_begin(); s->sequence++; @@ -623,9 +623,9 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s) * will complete successfully and see data older than this. */ #define write_seqcount_invalidate(s) \ - write_seqcount_t_invalidate(seqprop_ptr(s)) + do_write_seqcount_invalidate(seqprop_ptr(s)) -static inline void write_seqcount_t_invalidate(seqcount_t *s) +static inline void do_write_seqcount_invalidate(seqcount_t *s) { smp_wmb(); kcsan_nestable_atomic_begin(); @@ -865,9 +865,9 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) } /* - * For all seqlock_t write side functions, use write_seqcount_*t*_begin() - * instead of the generic write_seqcount_begin(). This way, no redundant - * lockdep_assert_held() checks are added. + * For all seqlock_t write side functions, use the the internal + * do_write_seqcount_begin() instead of generic write_seqcount_begin(). + * This way, no redundant lockdep_assert_held() checks are added. */ /** @@ -886,7 +886,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) static inline void write_seqlock(seqlock_t *sl) { spin_lock(&sl->lock); - write_seqcount_t_begin(&sl->seqcount.seqcount); + do_write_seqcount_begin(&sl->seqcount.seqcount); } /** @@ -898,7 +898,7 @@ static inline void write_seqlock(seqlock_t *sl) */ static inline void write_sequnlock(seqlock_t *sl) { - write_seqcount_t_end(&sl->seqcount.seqcount); + do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock(&sl->lock); } @@ -912,7 +912,7 @@ static inline void write_sequnlock(seqlock_t *sl) static inline void write_seqlock_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); - write_seqcount_t_begin(&sl->seqcount.seqcount); + do_write_seqcount_begin(&sl->seqcount.seqcount); } /** @@ -925,7 +925,7 @@ static inline void write_seqlock_bh(seqlock_t *sl) */ static inline void write_sequnlock_bh(seqlock_t *sl) { - write_seqcount_t_end(&sl->seqcount.seqcount); + do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock_bh(&sl->lock); } @@ -939,7 +939,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl) static inline void write_seqlock_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); - write_seqcount_t_begin(&sl->seqcount.seqcount); + do_write_seqcount_begin(&sl->seqcount.seqcount); } /** @@ -951,7 +951,7 @@ static inline void write_seqlock_irq(seqlock_t *sl) */ static inline void write_sequnlock_irq(seqlock_t *sl) { - write_seqcount_t_end(&sl->seqcount.seqcount); + do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock_irq(&sl->lock); } @@ -960,7 +960,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) unsigned long flags; spin_lock_irqsave(&sl->lock, flags); - write_seqcount_t_begin(&sl->seqcount.seqcount); + do_write_seqcount_begin(&sl->seqcount.seqcount); return flags; } @@ -989,7 +989,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) static inline void write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) { - write_seqcount_t_end(&sl->seqcount.seqcount); + do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock_irqrestore(&sl->lock, flags); } -- cgit v1.2.3 From cb262935a166bdef0ccfe6e2adffa00c0f2d038a Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Sun, 6 Dec 2020 17:21:43 +0100 Subject: seqlock: kernel-doc: Specify when preemption is automatically altered The kernel-doc annotations for sequence counters write side functions are incomplete: they do not specify when preemption is automatically disabled and re-enabled. This has confused a number of call-site developers. Fix it. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/CAHk-=wikhGExmprXgaW+MVXG1zsGpztBbVwOb23vetk41EtTBQ@mail.gmail.com --- include/linux/seqlock.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 235cbc65fd71..2f7bb92b4c9e 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -456,6 +456,8 @@ static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start) /** * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants + * + * Context: check write_seqcount_begin() */ #define raw_write_seqcount_begin(s) \ do { \ @@ -475,6 +477,8 @@ static inline void do_raw_write_seqcount_begin(seqcount_t *s) /** * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants + * + * Context: check write_seqcount_end() */ #define raw_write_seqcount_end(s) \ do { \ @@ -498,6 +502,7 @@ static inline void do_raw_write_seqcount_end(seqcount_t *s) * @subclass: lockdep nesting level * * See Documentation/locking/lockdep-design.rst + * Context: check write_seqcount_begin() */ #define write_seqcount_begin_nested(s, subclass) \ do { \ @@ -519,11 +524,10 @@ static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass) * write_seqcount_begin() - start a seqcount_t write side critical section * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * - * write_seqcount_begin opens a write side critical section of the given - * seqcount_t. - * - * Context: seqcount_t write side critical sections must be serialized and - * non-preemptible. If readers can be invoked from hardirq or softirq + * Context: sequence counter write side sections must be serialized and + * non-preemptible. Preemption will be automatically disabled if and + * only if the seqcount write serialization lock is associated, and + * preemptible. If readers can be invoked from hardirq or softirq * context, interrupts or bottom halves must be respectively disabled. */ #define write_seqcount_begin(s) \ @@ -545,7 +549,8 @@ static inline void do_write_seqcount_begin(seqcount_t *s) * write_seqcount_end() - end a seqcount_t write side critical section * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * - * The write section must've been opened with write_seqcount_begin(). + * Context: Preemption will be automatically re-enabled if and only if + * the seqcount write serialization lock is associated, and preemptible. */ #define write_seqcount_end(s) \ do { \ -- cgit v1.2.3 From 0854bcdcdec26aecdc92c303816f349ee1fba2bc Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:45 -0800 Subject: scsi: block: Introduce BLK_MQ_REQ_PM Introduce the BLK_MQ_REQ_PM flag. This flag makes the request allocation functions set RQF_PM. This is the first step towards removing BLK_MQ_REQ_PREEMPT. Link: https://lore.kernel.org/r/20201209052951.16136-3-bvanassche@acm.org Cc: Alan Stern Cc: Stanley Chu Cc: Ming Lei Cc: Rafael J. Wysocki Cc: Can Guo Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Jens Axboe Reviewed-by: Can Guo Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b23eeca4d677..c00e856c6fb1 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -444,6 +444,8 @@ enum { BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), /* allocate from reserved pool */ BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), + /* set RQF_PM */ + BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), /* set RQF_PREEMPT */ BLK_MQ_REQ_PREEMPT = (__force blk_mq_req_flags_t)(1 << 3), }; -- cgit v1.2.3 From a4d34da715e3cb7e0741fe603dcd511bed067e00 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:50 -0800 Subject: scsi: block: Remove RQF_PREEMPT and BLK_MQ_REQ_PREEMPT Remove flag RQF_PREEMPT and BLK_MQ_REQ_PREEMPT since these are no longer used by any kernel code. Link: https://lore.kernel.org/r/20201209052951.16136-8-bvanassche@acm.org Cc: Can Guo Cc: Stanley Chu Cc: Alan Stern Cc: Ming Lei Cc: Rafael J. Wysocki Cc: Martin Kepplinger Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Jens Axboe Reviewed-by: Can Guo Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/linux/blk-mq.h | 2 -- include/linux/blkdev.h | 6 +----- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c00e856c6fb1..88af1df94308 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -446,8 +446,6 @@ enum { BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), /* set RQF_PM */ BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), - /* set RQF_PREEMPT */ - BLK_MQ_REQ_PREEMPT = (__force blk_mq_req_flags_t)(1 << 3), }; struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 639cae2c158b..7d4b746f7e6a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -79,9 +79,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) /* don't call prep for this one */ #define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) -/* set for "ide_preempt" requests and also for requests for which the SCSI - "quiesce" state must be ignored. */ -#define RQF_PREEMPT ((__force req_flags_t)(1 << 8)) /* vaguely specified driver internal error. Ignored by the block layer */ #define RQF_FAILED ((__force req_flags_t)(1 << 10)) /* don't warn about errors */ @@ -430,8 +427,7 @@ struct request_queue { unsigned long queue_flags; /* * Number of contexts that have called blk_set_pm_only(). If this - * counter is above zero then only RQF_PM and RQF_PREEMPT requests are - * processed. + * counter is above zero then only RQF_PM requests are processed. */ atomic_t pm_only; -- cgit v1.2.3 From 52abca64fd9410ea6c9a3a74eab25663b403d7da Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 8 Dec 2020 21:29:51 -0800 Subject: scsi: block: Do not accept any requests while suspended blk_queue_enter() accepts BLK_MQ_REQ_PM requests independent of the runtime power management state. Now that SCSI domain validation no longer depends on this behavior, modify the behavior of blk_queue_enter() as follows: - Do not accept any requests while suspended. - Only process power management requests while suspending or resuming. Submitting BLK_MQ_REQ_PM requests to a device that is runtime suspended causes runtime-suspended devices not to resume as they should. The request which should cause a runtime resume instead gets issued directly, without resuming the device first. Of course the device can't handle it properly, the I/O fails, and the device remains suspended. The problem is fixed by checking that the queue's runtime-PM status isn't RPM_SUSPENDED before allowing a request to be issued, and queuing a runtime-resume request if it is. In particular, the inline blk_pm_request_resume() routine is renamed blk_pm_resume_queue() and the code is unified by merging the surrounding checks into the routine. If the queue isn't set up for runtime PM, or there currently is no restriction on allowed requests, the request is allowed. Likewise if the BLK_MQ_REQ_PM flag is set and the status isn't RPM_SUSPENDED. Otherwise a runtime resume is queued and the request is blocked until conditions are more suitable. [ bvanassche: modified commit message and removed Cc: stable because without the previous patches from this series this patch would break parallel SCSI domain validation + introduced queue_rpm_status() ] Link: https://lore.kernel.org/r/20201209052951.16136-9-bvanassche@acm.org Cc: Jens Axboe Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Can Guo Cc: Stanley Chu Cc: Ming Lei Cc: Rafael J. Wysocki Reported-and-tested-by: Martin Kepplinger Reviewed-by: Hannes Reinecke Reviewed-by: Can Guo Signed-off-by: Alan Stern Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/linux/blkdev.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7d4b746f7e6a..2b6fc3fb3a99 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -692,6 +692,18 @@ static inline bool queue_is_mq(struct request_queue *q) return q->mq_ops; } +#ifdef CONFIG_PM +static inline enum rpm_status queue_rpm_status(struct request_queue *q) +{ + return q->rpm_status; +} +#else +static inline enum rpm_status queue_rpm_status(struct request_queue *q) +{ + return RPM_ACTIVE; +} +#endif + static inline enum blk_zoned_model blk_queue_zoned_model(struct request_queue *q) { -- cgit v1.2.3 From c95d64012ad7de2747923b0caf80e195e940606c Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:16 -0800 Subject: Revert "driver core: Avoid deferred probe due to fw_devlink_pause/resume()" This reverts commit 2451e746478a6a6e981cfa66b62b791ca93b90c8. fw_devlink_pause/resume() was an incomplete attempt at boot time optimization. That's going to get replaced by a much better optimization at the end of the series. Since fw_devlink_pause/resume() is going away, changes made for that can also go away. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 5ed101be7b2e..da00f8e449bb 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -352,8 +352,7 @@ enum dl_dev_state { * @suppliers: List of links to supplier devices. * @consumers: List of links to consumer devices. * @needs_suppliers: Hook to global list of devices waiting for suppliers. - * @defer_hook: Hook to global list of devices that have deferred sync_state or - * deferred fw_devlink. + * @defer_hook: Hook to global list of devices that have deferred sync_state. * @need_for_probe: If needs_suppliers is on a list, this indicates if the * suppliers are needed for probe or not. * @status: Driver status information. -- cgit v1.2.3 From 3b052a3e30f2eb92dcae9fd89af48d5a13045737 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:17 -0800 Subject: Revert "driver core: Rename dev_links_info.defer_sync to defer_hook" This reverts commit ec7bd78498f29680f536451fbdf9464e851273ed. This field rename was done to reuse defer_syc list head for multiple lists. That's not needed anymore and this list head will only be used for defer sync. So revert this patch to avoid conflicts with the other reverts coming after this. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index da00f8e449bb..1e771ea4dca6 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -352,7 +352,7 @@ enum dl_dev_state { * @suppliers: List of links to supplier devices. * @consumers: List of links to consumer devices. * @needs_suppliers: Hook to global list of devices waiting for suppliers. - * @defer_hook: Hook to global list of devices that have deferred sync_state. + * @defer_sync: Hook to global list of devices that have deferred sync_state. * @need_for_probe: If needs_suppliers is on a list, this indicates if the * suppliers are needed for probe or not. * @status: Driver status information. @@ -361,7 +361,7 @@ struct dev_links_info { struct list_head suppliers; struct list_head consumers; struct list_head needs_suppliers; - struct list_head defer_hook; + struct list_head defer_sync; bool need_for_probe; enum dl_dev_state status; }; -- cgit v1.2.3 From c84b90909e475a2eb4934b4d92fdd10e73e75805 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:21 -0800 Subject: Revert "driver core: fw_devlink: Add support for batching fwnode parsing" This reverts commit 716a7a25969003d82ab738179c3f1068a120ed11. The fw_devlink_pause/resume() APIs added by the commit being reverted were a first cut attempt at optimizing boot time. But these APIs don't fully solve the problem and are very fragile (can only be used for the top level devices being added). This series replaces them with a much better optimization that works for all device additions and also has the benefit of reducing the complexity of the firmware (DT, EFI) specific code and abstracting out common code to driver core. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-7-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 9506f8ec0974..e0abafbb17f8 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -171,7 +171,5 @@ struct fwnode_operations { #define get_dev_from_fwnode(fwnode) get_device((fwnode)->dev) extern u32 fw_devlink_get_flags(void); -void fw_devlink_pause(void); -void fw_devlink_resume(void); #endif -- cgit v1.2.3 From 01bb86b380a306bd937c96da36f66429f3362137 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:22 -0800 Subject: driver core: Add fwnode_init() There are multiple locations in the kernel where a struct fwnode_handle is initialized. Add fwnode_init() so that we have one way of initializing a fwnode_handle. Acked-by: Rob Herring Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-8-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 6 ++++++ include/linux/of.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index e0abafbb17f8..5589799708b5 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -170,6 +170,12 @@ struct fwnode_operations { } while (false) #define get_dev_from_fwnode(fwnode) get_device((fwnode)->dev) +static inline void fwnode_init(struct fwnode_handle *fwnode, + const struct fwnode_operations *ops) +{ + fwnode->ops = ops; +} + extern u32 fw_devlink_get_flags(void); #endif diff --git a/include/linux/of.h b/include/linux/of.h index af655d264f10..df71a3a1bb8d 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -108,7 +108,7 @@ static inline void of_node_init(struct device_node *node) #if defined(CONFIG_OF_KOBJ) kobject_init(&node->kobj, &of_node_ktype); #endif - node->fwnode.ops = &of_fwnode_ops; + fwnode_init(&node->fwnode, &of_fwnode_ops); } #if defined(CONFIG_OF_KOBJ) -- cgit v1.2.3 From 7b337cb3ebde384cba7405b61dfb84200bf623bf Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:23 -0800 Subject: driver core: Add fwnode link support Add support for creating supplier-consumer links between fwnodes. It is intended for internal use the driver core and generic firmware support code (eg. Device Tree, ACPI), so it is simple by design and the API provided is limited. Acked-by: Rob Herring Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-9-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 5589799708b5..b88365187347 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -10,6 +10,7 @@ #define _LINUX_FWNODE_H_ #include +#include struct fwnode_operations; struct device; @@ -18,6 +19,15 @@ struct fwnode_handle { struct fwnode_handle *secondary; const struct fwnode_operations *ops; struct device *dev; + struct list_head suppliers; + struct list_head consumers; +}; + +struct fwnode_link { + struct fwnode_handle *supplier; + struct list_head s_hook; + struct fwnode_handle *consumer; + struct list_head c_hook; }; /** @@ -174,8 +184,12 @@ static inline void fwnode_init(struct fwnode_handle *fwnode, const struct fwnode_operations *ops) { fwnode->ops = ops; + INIT_LIST_HEAD(&fwnode->consumers); + INIT_LIST_HEAD(&fwnode->suppliers); } extern u32 fw_devlink_get_flags(void); +int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup); +void fwnode_links_purge(struct fwnode_handle *fwnode); #endif -- cgit v1.2.3 From b5d3e2fbcb10957521af14c4256cd0e5f68b9234 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:25 -0800 Subject: device property: Add fwnode_is_ancestor_of() and fwnode_get_next_parent_dev() Add fwnode_is_ancestor_of() helper function to check if a fwnode is an ancestor of another fwnode. Add fwnode_get_next_parent_dev() helper function that take as input a fwnode and finds the closest ancestor fwnode that has a corresponding struct device and returns that struct device. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-11-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/property.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 2d4542629d80..0a9001fe7aea 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -85,9 +85,12 @@ const char *fwnode_get_name_prefix(const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_get_next_parent( struct fwnode_handle *fwnode); +struct device *fwnode_get_next_parent_dev(struct fwnode_handle *fwnode); unsigned int fwnode_count_parents(const struct fwnode_handle *fwn); struct fwnode_handle *fwnode_get_nth_parent(struct fwnode_handle *fwn, unsigned int depth); +bool fwnode_is_ancestor_of(struct fwnode_handle *test_ancestor, + struct fwnode_handle *test_child); struct fwnode_handle *fwnode_get_next_child_node( const struct fwnode_handle *fwnode, struct fwnode_handle *child); struct fwnode_handle *fwnode_get_next_available_child_node( -- cgit v1.2.3 From 04f63c213b671d89db35f4239f57fa1edeb736a8 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:26 -0800 Subject: driver core: Redefine the meaning of fwnode_operations.add_links() Change the meaning of fwnode_operations.add_links() to just create fwnode links by parsing the properties of a given fwnode. This patch doesn't actually make any code changes. To keeps things more digestable, the actual functional changes come in later patches in this series. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-12-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 42 +++--------------------------------------- 1 file changed, 3 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index b88365187347..942a6bb18201 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -78,44 +78,8 @@ struct fwnode_reference_args { * endpoint node. * @graph_get_port_parent: Return the parent node of a port node. * @graph_parse_endpoint: Parse endpoint for port and endpoint id. - * @add_links: Called after the device corresponding to the fwnode is added - * using device_add(). The function is expected to create device - * links to all the suppliers of the device that are available at - * the time this function is called. The function must NOT stop - * at the first failed device link if other unlinked supplier - * devices are present in the system. This is necessary for the - * driver/bus sync_state() callbacks to work correctly. - * - * For example, say Device-C depends on suppliers Device-S1 and - * Device-S2 and the dependency is listed in that order in the - * firmware. Say, S1 gets populated from the firmware after - * late_initcall_sync(). Say S2 is populated and probed way - * before that in device_initcall(). When C is populated, if this - * add_links() function doesn't continue past a "failed linking to - * S1" and continue linking C to S2, then S2 will get a - * sync_state() callback before C is probed. This is because from - * the perspective of S2, C was never a consumer when its - * sync_state() evaluation is done. To avoid this, the add_links() - * function has to go through all available suppliers of the - * device (that corresponds to this fwnode) and link to them - * before returning. - * - * If some suppliers are not yet available (indicated by an error - * return value), this function will be called again when other - * devices are added to allow creating device links to any newly - * available suppliers. - * - * Return 0 if device links have been successfully created to all - * the known suppliers of this device or if the supplier - * information is not known. - * - * Return -ENODEV if the suppliers needed for probing this device - * have not been registered yet (because device links can only be - * created to devices registered with the driver core). - * - * Return -EAGAIN if some of the suppliers of this device have not - * been registered yet, but none of those suppliers are necessary - * for probing the device. + * @add_links: Create fwnode links to all the suppliers of the fwnode. Return + * zero on success, a negative error code otherwise. */ struct fwnode_operations { struct fwnode_handle *(*get)(struct fwnode_handle *fwnode); @@ -155,7 +119,7 @@ struct fwnode_operations { (*graph_get_port_parent)(struct fwnode_handle *fwnode); int (*graph_parse_endpoint)(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint); - int (*add_links)(const struct fwnode_handle *fwnode, + int (*add_links)(struct fwnode_handle *fwnode, struct device *dev); }; -- cgit v1.2.3 From c2c724c868c42c5166bf7aa644dd0a0c8d30b47a Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:27 -0800 Subject: driver core: Add fw_devlink_parse_fwtree() This function is a wrapper around fwnode_operations.add_links(). This function parses each node in a fwnode tree and create fwnode links for each of those nodes. The information for creating the fwnode links (the supplier and consumer fwnode) is obtained by parsing the properties in each of the fwnodes. This function also ensures that no fwnode is parsed more than once by marking the fwnodes as parsed. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-13-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 942a6bb18201..ffa9129182a6 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -15,12 +15,20 @@ struct fwnode_operations; struct device; +/* + * fwnode link flags + * + * LINKS_ADDED: The fwnode has already be parsed to add fwnode links. + */ +#define FWNODE_FLAG_LINKS_ADDED BIT(0) + struct fwnode_handle { struct fwnode_handle *secondary; const struct fwnode_operations *ops; struct device *dev; struct list_head suppliers; struct list_head consumers; + u8 flags; }; struct fwnode_link { -- cgit v1.2.3 From f9aa460672c9c56896cdc12a521159e3e67000ba Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:31 -0800 Subject: driver core: Refactor fw_devlink feature The current implementation of fw_devlink is very inefficient because it tries to get away without creating fwnode links in the name of saving memory usage. Past attempts to optimize runtime at the cost of memory usage were blocked with request for data showing that the optimization made significant improvement for real world scenarios. We have those scenarios now. There have been several reports of boot time increase in the order of seconds in this thread [1]. Several OEMs and SoC manufacturers have also privately reported significant (350-400ms) increase in boot time due to all the parsing done by fw_devlink. So this patch uses all the setup done by the previous patches in this series to refactor fw_devlink to be more efficient. Most of the code has been moved out of firmware specific (DT mostly) code into driver core. This brings the following benefits: - Instead of parsing the device tree multiple times during bootup, fw_devlink parses each fwnode node/property only once and creates fwnode links. The rest of the fw_devlink code then just looks at these fwnode links to do rest of the work. - Makes it much easier to debug probe issue due to fw_devlink in the future. fw_devlink=on blocks the probing of devices if they depend on a device that hasn't been added yet. With this refactor, it'll be very easy to tell what that device is because we now have a reference to the fwnode of the device. - Much easier to add fw_devlink support to ACPI and other firmware types. A refactor to move the common bits from DT specific code to driver core was in my TODO list as a prerequisite to adding ACPI support to fw_devlink. This series gets that done. [1] - https://lore.kernel.org/linux-omap/ea02f57e-871d-cd16-4418-c1da4bbc4696@ti.com/ Tested-by: Laurent Pinchart Tested-by: Grygorii Strashko Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-17-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 1e771ea4dca6..89bb8b84173e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -351,18 +351,13 @@ enum dl_dev_state { * struct dev_links_info - Device data related to device links. * @suppliers: List of links to supplier devices. * @consumers: List of links to consumer devices. - * @needs_suppliers: Hook to global list of devices waiting for suppliers. * @defer_sync: Hook to global list of devices that have deferred sync_state. - * @need_for_probe: If needs_suppliers is on a list, this indicates if the - * suppliers are needed for probe or not. * @status: Driver status information. */ struct dev_links_info { struct list_head suppliers; struct list_head consumers; - struct list_head needs_suppliers; struct list_head defer_sync; - bool need_for_probe; enum dl_dev_state status; }; -- cgit v1.2.3 From 2d09e6eb4a6f20273959f4905ccf009da8c64c7a Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 20 Nov 2020 18:02:32 -0800 Subject: driver core: Delete pointless parameter in fwnode_operations.add_links The struct device input to add_links() is not used for anything. So delete it. Acked-by: Rob Herring Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201121020232.908850-18-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index ffa9129182a6..fde4ad97564c 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -127,8 +127,7 @@ struct fwnode_operations { (*graph_get_port_parent)(struct fwnode_handle *fwnode); int (*graph_parse_endpoint)(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint); - int (*add_links)(struct fwnode_handle *fwnode, - struct device *dev); + int (*add_links)(struct fwnode_handle *fwnode); }; #define fwnode_has_op(fwnode, op) \ -- cgit v1.2.3 From 30b79eb1f92ed5974885d374a4107c94e2dd3e03 Mon Sep 17 00:00:00 2001 From: Michael Tretter Date: Mon, 9 Nov 2020 14:48:17 +0100 Subject: soc: xilinx: vcu: use vcu-settings syscon registers Switch the "logicoreip" registers to the new xlnx,vcu-settings binding to be able to read the settings if the settings are specified in a separate device tree node that is shared with other drivers. If the driver is not able to find a node with the new binding, fall back to check for the logicore register bank to be backwards compatible. Signed-off-by: Michael Tretter Reviewed-by: Hyun Kwon Link: https://lore.kernel.org/r/20201109134818.4159342-4-m.tretter@pengutronix.de Signed-off-by: Michal Simek --- include/linux/mfd/syscon/xlnx-vcu.h | 38 +++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 include/linux/mfd/syscon/xlnx-vcu.h (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/xlnx-vcu.h b/include/linux/mfd/syscon/xlnx-vcu.h new file mode 100644 index 000000000000..d3edec4b7b1d --- /dev/null +++ b/include/linux/mfd/syscon/xlnx-vcu.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Pengutronix, Michael Tretter + */ + +#ifndef __XLNX_VCU_H +#define __XLNX_VCU_H + +#define VCU_ECODER_ENABLE 0x00 +#define VCU_DECODER_ENABLE 0x04 +#define VCU_MEMORY_DEPTH 0x08 +#define VCU_ENC_COLOR_DEPTH 0x0c +#define VCU_ENC_VERTICAL_RANGE 0x10 +#define VCU_ENC_FRAME_SIZE_X 0x14 +#define VCU_ENC_FRAME_SIZE_Y 0x18 +#define VCU_ENC_COLOR_FORMAT 0x1c +#define VCU_ENC_FPS 0x20 +#define VCU_MCU_CLK 0x24 +#define VCU_CORE_CLK 0x28 +#define VCU_PLL_BYPASS 0x2c +#define VCU_ENC_CLK 0x30 +#define VCU_PLL_CLK 0x34 +#define VCU_ENC_VIDEO_STANDARD 0x38 +#define VCU_STATUS 0x3c +#define VCU_AXI_ENC_CLK 0x40 +#define VCU_AXI_DEC_CLK 0x44 +#define VCU_AXI_MCU_CLK 0x48 +#define VCU_DEC_VIDEO_STANDARD 0x4c +#define VCU_DEC_FRAME_SIZE_X 0x50 +#define VCU_DEC_FRAME_SIZE_Y 0x54 +#define VCU_DEC_FPS 0x58 +#define VCU_BUFFER_B_FRAME 0x5c +#define VCU_WPP_EN 0x60 +#define VCU_PLL_CLK_DEC 0x64 +#define VCU_GASKET_INIT 0x74 +#define VCU_GASKET_VALUE 0x03 + +#endif /* __XLNX_VCU_H */ -- cgit v1.2.3 From 7b1c9b8441aa94a549a90fa3d42687ccbad3eade Mon Sep 17 00:00:00 2001 From: Michael Tretter Date: Mon, 9 Nov 2020 14:48:18 +0100 Subject: soc: xilinx: vcu: add missing register NUM_CORE The H.264/H.265 Video Codec Unit v1.2 documentation describes this register as follows: Number of encoders core used for the provided configuration This is required for configuring the VCU encoder buffer. Signed-off-by: Michael Tretter Reviewed-by: Hyun Kwon Link: https://lore.kernel.org/r/20201109134818.4159342-5-m.tretter@pengutronix.de Signed-off-by: Michal Simek --- include/linux/mfd/syscon/xlnx-vcu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/xlnx-vcu.h b/include/linux/mfd/syscon/xlnx-vcu.h index d3edec4b7b1d..ff7bc3656f6e 100644 --- a/include/linux/mfd/syscon/xlnx-vcu.h +++ b/include/linux/mfd/syscon/xlnx-vcu.h @@ -32,6 +32,7 @@ #define VCU_BUFFER_B_FRAME 0x5c #define VCU_WPP_EN 0x60 #define VCU_PLL_CLK_DEC 0x64 +#define VCU_NUM_CORE 0x6c #define VCU_GASKET_INIT 0x74 #define VCU_GASKET_VALUE 0x03 -- cgit v1.2.3 From 463edf5a59fd8f0fe0135101d67bfca81d1e3771 Mon Sep 17 00:00:00 2001 From: Wendy Liang Date: Tue, 24 Nov 2020 00:18:18 -0800 Subject: firmware: xlnx-zynqmp: fix compilation warning Fix compilation warning when ZYNQMP_FIRMWARE is not defined. include/linux/firmware/xlnx-zynqmp.h: In function 'zynqmp_pm_get_eemi_ops': include/linux/firmware/xlnx-zynqmp.h:363:9: error: implicit declaration of function 'ERR_PTR' [-Werror=implicit-function-declaration] 363 | return ERR_PTR(-ENODEV); include/linux/firmware/xlnx-zynqmp.h:363:18: note: each undeclared identifier is reported only once for each function it appears in include/linux/firmware/xlnx-zynqmp.h: In function 'zynqmp_pm_get_api_version': include/linux/firmware/xlnx-zynqmp.h:367:10: error: 'ENODEV' undeclared (first use in this function) 367 | return -ENODEV; | ^~~~~~ Signed-off-by: Wendy Liang Link: https://lore.kernel.org/r/1606205898-12642-1-git-send-email-wendy.liang@xilinx.com Signed-off-by: Michal Simek --- include/linux/firmware/xlnx-zynqmp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 5968df82b991..f84244ea633b 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -13,6 +13,8 @@ #ifndef __FIRMWARE_ZYNQMP_H__ #define __FIRMWARE_ZYNQMP_H__ +#include + #define ZYNQMP_PM_VERSION_MAJOR 1 #define ZYNQMP_PM_VERSION_MINOR 0 -- cgit v1.2.3 From 1f6a11a01059f9c65f8461987cc0bab4c0b58338 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 2 Dec 2020 08:38:48 +0100 Subject: firmware: xilinx: Remove additional newline This additional newline is useless and also reported by checkpatch --strict. Signed-off-by: Michal Simek Link: https://lore.kernel.org/r/d927f3f2c97910958dd77a22828cd0bf8d89c9de.1606894725.git.michal.simek@xilinx.com --- include/linux/firmware/xlnx-zynqmp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index f84244ea633b..0db9005782d6 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -316,7 +316,6 @@ struct zynqmp_pm_query_data { u32 arg3; }; - int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 *ret_payload); -- cgit v1.2.3 From a80cefec2c2783166727324bde724c39aa8a12df Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 2 Dec 2020 08:38:49 +0100 Subject: firmware: xilinx: Add a blank line after function declaration Fix all these issues which are also reported by checkpatch --strict. Signed-off-by: Michal Simek Link: https://lore.kernel.org/r/7b6007e05f6c01214861a37f198cd5bee62a4d3e.1606894725.git.michal.simek@xilinx.com --- include/linux/firmware/xlnx-zynqmp.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 0db9005782d6..0e7e72650ed3 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -363,107 +363,132 @@ static inline struct zynqmp_eemi_ops *zynqmp_pm_get_eemi_ops(void) { return ERR_PTR(-ENODEV); } + static inline int zynqmp_pm_get_api_version(u32 *version) { return -ENODEV; } + static inline int zynqmp_pm_get_chipid(u32 *idcode, u32 *version) { return -ENODEV; } + static inline int zynqmp_pm_query_data(struct zynqmp_pm_query_data qdata, u32 *out) { return -ENODEV; } + static inline int zynqmp_pm_clock_enable(u32 clock_id) { return -ENODEV; } + static inline int zynqmp_pm_clock_disable(u32 clock_id) { return -ENODEV; } + static inline int zynqmp_pm_clock_getstate(u32 clock_id, u32 *state) { return -ENODEV; } + static inline int zynqmp_pm_clock_setdivider(u32 clock_id, u32 divider) { return -ENODEV; } + static inline int zynqmp_pm_clock_getdivider(u32 clock_id, u32 *divider) { return -ENODEV; } + static inline int zynqmp_pm_clock_setrate(u32 clock_id, u64 rate) { return -ENODEV; } + static inline int zynqmp_pm_clock_getrate(u32 clock_id, u64 *rate) { return -ENODEV; } + static inline int zynqmp_pm_clock_setparent(u32 clock_id, u32 parent_id) { return -ENODEV; } + static inline int zynqmp_pm_clock_getparent(u32 clock_id, u32 *parent_id) { return -ENODEV; } + static inline int zynqmp_pm_set_pll_frac_mode(u32 clk_id, u32 mode) { return -ENODEV; } + static inline int zynqmp_pm_get_pll_frac_mode(u32 clk_id, u32 *mode) { return -ENODEV; } + static inline int zynqmp_pm_set_pll_frac_data(u32 clk_id, u32 data) { return -ENODEV; } + static inline int zynqmp_pm_get_pll_frac_data(u32 clk_id, u32 *data) { return -ENODEV; } + static inline int zynqmp_pm_set_sd_tapdelay(u32 node_id, u32 type, u32 value) { return -ENODEV; } + static inline int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type) { return -ENODEV; } + static inline int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, const enum zynqmp_pm_reset_action assert_flag) { return -ENODEV; } + static inline int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, u32 *status) { return -ENODEV; } + static inline int zynqmp_pm_init_finalize(void) { return -ENODEV; } + static inline int zynqmp_pm_set_suspend_mode(u32 mode) { return -ENODEV; } + static inline int zynqmp_pm_request_node(const u32 node, const u32 capabilities, const u32 qos, const enum zynqmp_pm_request_ack ack) { return -ENODEV; } + static inline int zynqmp_pm_release_node(const u32 node) { return -ENODEV; } + static inline int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities, const u32 qos, @@ -471,39 +496,48 @@ static inline int zynqmp_pm_set_requirement(const u32 node, { return -ENODEV; } + static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out) { return -ENODEV; } + static inline int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags) { return -ENODEV; } + static inline int zynqmp_pm_fpga_get_status(u32 *value) { return -ENODEV; } + static inline int zynqmp_pm_write_ggs(u32 index, u32 value) { return -ENODEV; } + static inline int zynqmp_pm_read_ggs(u32 index, u32 *value) { return -ENODEV; } + static inline int zynqmp_pm_write_pggs(u32 index, u32 value) { return -ENODEV; } + static inline int zynqmp_pm_read_pggs(u32 index, u32 *value) { return -ENODEV; } + static inline int zynqmp_pm_system_shutdown(const u32 type, const u32 subtype) { return -ENODEV; } + static inline int zynqmp_pm_set_boot_health_status(u32 value) { return -ENODEV; -- cgit v1.2.3 From 311c2520de21cb2f44291ad3d984b42191126628 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 2 Dec 2020 08:38:50 +0100 Subject: firmware: xilinx: Properly align function parameter Fix parameters alignment reported by checkpatch --strict. Signed-off-by: Michal Simek Link: https://lore.kernel.org/r/00ed9fcb94a6c22eff1fe8afdea46b2764a8687d.1606894725.git.michal.simek@xilinx.com --- include/linux/firmware/xlnx-zynqmp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 0e7e72650ed3..edc2977b26d9 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -456,7 +456,7 @@ static inline int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type) } static inline int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, - const enum zynqmp_pm_reset_action assert_flag) + const enum zynqmp_pm_reset_action assert_flag) { return -ENODEV; } @@ -490,9 +490,9 @@ static inline int zynqmp_pm_release_node(const u32 node) } static inline int zynqmp_pm_set_requirement(const u32 node, - const u32 capabilities, - const u32 qos, - const enum zynqmp_pm_request_ack ack) + const u32 capabilities, + const u32 qos, + const enum zynqmp_pm_request_ack ack) { return -ENODEV; } -- cgit v1.2.3 From 5b4258f6721f41b092c63f6ee71be76e9616718b Mon Sep 17 00:00:00 2001 From: Ricky Wu Date: Wed, 2 Dec 2020 14:58:57 +0800 Subject: misc: rtsx: rts5249 support runtime PM rtsx_pcr: add callback functions to support runtime PM add delay_work to put device to D3 after idle over 10 sec rts5249: add extra init flow for rtd3 and set rtd3_en from config setting rtsx_pci_sdmmc: child device support autosuspend Signed-off-by: Ricky Wu Link: https://lore.kernel.org/r/20201202065857.19412-1-ricky_wu@realtek.com Signed-off-by: Greg Kroah-Hartman --- include/linux/rtsx_pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 745f5e73f99a..f895ccabbe29 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -1174,6 +1174,7 @@ struct rtsx_pcr { struct delayed_work carddet_work; struct delayed_work idle_work; + struct delayed_work rtd3_work; spinlock_t lock; struct mutex pcr_mutex; @@ -1183,6 +1184,7 @@ struct rtsx_pcr { unsigned int cur_clock; bool remove_pci; bool msi_en; + bool is_runtime_suspended; #define EXTRA_CAPS_SD_SDR50 (1 << 0) #define EXTRA_CAPS_SD_SDR104 (1 << 1) -- cgit v1.2.3 From 8010622c86ca5bb44bc98492f5968726fc7c7a21 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 9 Dec 2020 16:26:39 +0100 Subject: USB: UAS: introduce a quirk to set no_write_same UAS does not share the pessimistic assumption storage is making that devices cannot deal with WRITE_SAME. A few devices supported by UAS, are reported to not deal well with WRITE_SAME. Those need a quirk. Add it to the device that needs it. Reported-by: David C. Partridge Signed-off-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20201209152639.9195-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 4a19ac3f24d0..6b03fdd69d27 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -84,6 +84,8 @@ /* Cannot handle REPORT_LUNS */ \ US_FLAG(ALWAYS_SYNC, 0x20000000) \ /* lies about caching, so always sync */ \ + US_FLAG(NO_SAME, 0x40000000) \ + /* Cannot handle WRITE_SAME */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3 From c73ebb685fb6dfb513d394cbea64fb81ba3d994f Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Tue, 3 Nov 2020 10:54:37 +0800 Subject: io_uring: add timeout support for io_uring_enter() Now users who want to get woken when waiting for events should submit a timeout command first. It is not safe for applications that split SQ and CQ handling between two threads, such as mysql. Users should synchronize the two threads explicitly to protect SQ and that will impact the performance. This patch adds support for timeout to existing io_uring_enter(). To avoid overloading arguments, it introduces a new parameter structure which contains sigmask and timeout. I have tested the workloads with one thread submiting nop requests while the other reaping the cqe with timeout. It shows 1.8~2x faster when the iodepth is 16. Signed-off-by: Jiufei Xue Signed-off-by: Hao Xu [axboe: various cleanups/fixes, and name change to SIG_IS_DATA] Signed-off-by: Jens Axboe --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 37bea07c12f2..8576e8bf92fe 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -317,7 +317,7 @@ asmlinkage long sys_io_uring_setup(u32 entries, struct io_uring_params __user *p); asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit, u32 min_complete, u32 flags, - const sigset_t __user *sig, size_t sigsz); + const void __user *argp, size_t argsz); asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op, void __user *arg, unsigned int nr_args); -- cgit v1.2.3 From 5a6338cce9f4133c478d3b10b300f96dd644379a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 17 Nov 2020 15:32:06 +0530 Subject: mailbox: arm_mhuv2: Add driver This adds driver for the ARM MHUv2 (Message Handling Unit) mailbox controller. This is based on the accepted DT bindings of the controller and supports combination of both transport protocols, i.e. doorbell and data-transfer. Transmitting and receiving data through the mailbox framework is done through struct arm_mhuv2_mbox_msg. Based on the initial work done by Morten Borup Petersen from ARM. Co-developed-by: Tushar Khandelwal Signed-off-by: Tushar Khandelwal Tested-by: Usama Arif Signed-off-by: Viresh Kumar Signed-off-by: Jassi Brar --- include/linux/mailbox/arm_mhuv2_message.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/linux/mailbox/arm_mhuv2_message.h (limited to 'include/linux') diff --git a/include/linux/mailbox/arm_mhuv2_message.h b/include/linux/mailbox/arm_mhuv2_message.h new file mode 100644 index 000000000000..821b9d96daa4 --- /dev/null +++ b/include/linux/mailbox/arm_mhuv2_message.h @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM MHUv2 Mailbox Message + * + * Copyright (C) 2020 Arm Ltd. + * Copyright (C) 2020 Linaro Ltd. + */ + +#ifndef _LINUX_ARM_MHUV2_MESSAGE_H_ +#define _LINUX_ARM_MHUV2_MESSAGE_H_ + +#include + +/* Data structure for data-transfer protocol */ +struct arm_mhuv2_mbox_msg { + void *data; + size_t len; +}; + +#endif /* _LINUX_ARM_MHUV2_MESSAGE_H_ */ -- cgit v1.2.3 From d40c2d4ed62df64ce603c208bceff25245380157 Mon Sep 17 00:00:00 2001 From: Hsin-Hsiung Wang Date: Wed, 9 Dec 2020 18:33:43 -0800 Subject: spmi: Add driver shutdown support Add new shutdown() method. Use it in the standard driver model style. Link: https://lore.kernel.org/r/1603187810-30481-2-git-send-email-hsin-hsiung.wang@mediatek.com Signed-off-by: Hsin-Hsiung Wang Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20201210023344.2838141-4-sboyd@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/spmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spmi.h b/include/linux/spmi.h index 394a3f68bad5..729bcbf9f5ad 100644 --- a/include/linux/spmi.h +++ b/include/linux/spmi.h @@ -138,6 +138,7 @@ struct spmi_driver { struct device_driver driver; int (*probe)(struct spmi_device *sdev); void (*remove)(struct spmi_device *sdev); + void (*shutdown)(struct spmi_device *sdev); }; static inline struct spmi_driver *to_spmi_driver(struct device_driver *d) -- cgit v1.2.3 From 1c12c27086dcef853832a7cbebcb48bdac8104b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 25 Nov 2020 10:31:06 +0100 Subject: siox: Make remove callback return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver core ignores the return value of the remove callback, so don't give siox drivers the chance to provide a value. All siox drivers only allocate devm-managed resources in .probe, so there is no .remove callback to fix. Tested-by: Thorsten Scherer Acked-by: Thorsten Scherer Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20201125093106.240643-3-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/siox.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/siox.h b/include/linux/siox.h index da7225bf1877..6bfbda3f634c 100644 --- a/include/linux/siox.h +++ b/include/linux/siox.h @@ -36,7 +36,7 @@ bool siox_device_connected(struct siox_device *sdevice); struct siox_driver { int (*probe)(struct siox_device *sdevice); - int (*remove)(struct siox_device *sdevice); + void (*remove)(struct siox_device *sdevice); void (*shutdown)(struct siox_device *sdevice); /* -- cgit v1.2.3 From 0aec2da436623abe19b80b21dd9fc5ec9300a152 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 9 Dec 2020 22:36:38 +0200 Subject: driver core: platform: Introduce platform_get_mem_or_io() There are at least few existing users of the proposed API which retrieves either MEM or IO resource from platform device. Make it common to utilize in the existing and new users. Cc: Eric Auger Cc: Alex Williamson Cc: kvm@vger.kernel.org Cc: linux-usb@vger.kernel.org Cc: Peng Hao Cc: Arnd Bergmann Reviewed-by: Cornelia Huck Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201209203642.27648-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 77a2aada106d..ee6a9f10c2c7 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -52,6 +52,9 @@ extern struct device platform_bus; extern struct resource *platform_get_resource(struct platform_device *, unsigned int, unsigned int); +extern struct resource *platform_get_mem_or_io(struct platform_device *, + unsigned int); + extern struct device * platform_find_device_by_driver(struct device *start, const struct device_driver *drv); -- cgit v1.2.3 From 1f702603e7125a390b5cdf5ce00539781cfcc86a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:19 -0600 Subject: exec: Simplify unshare_files Now that exec no longer needs to return the unshared files to their previous value there is no reason to return displaced. Instead when unshare_fd creates a copy of the file table, call put_files_struct before returning from unshare_files. Acked-by: Christian Brauner v1: https://lkml.kernel.org/r/20200817220425.9389-2-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-2-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index a32bf47c593e..f46a084b60b2 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -109,7 +109,7 @@ struct task_struct; struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); void reset_files_struct(struct files_struct *); -int unshare_files(struct files_struct **); +int unshare_files(void); struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy; void do_close_on_exec(struct files_struct *); int iterate_fd(struct files_struct *, unsigned, -- cgit v1.2.3 From 950db38ff2c01b7aabbd7ab4a50b7992750fa63d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:20 -0600 Subject: exec: Remove reset_files_struct Now that exec no longer needs to restore the previous value of current->files on error there are no more callers of reset_files_struct so remove it. Acked-by: Christian Brauner v1: https://lkml.kernel.org/r/20200817220425.9389-3-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-3-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index f46a084b60b2..7cc9885044d9 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -108,7 +108,6 @@ struct task_struct; struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); -void reset_files_struct(struct files_struct *); int unshare_files(void); struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy; void do_close_on_exec(struct files_struct *); -- cgit v1.2.3 From bebf684bf330915e6c96313ad7db89a5480fc9c2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:24 -0600 Subject: file: Rename __fcheck_files to files_lookup_fd_raw The function fcheck despite it's comment is poorly named as it has no callers that only check it's return value. All of fcheck's callers use the returned file descriptor. The same is true for fcheck_files and __fcheck_files. A new less confusing name is needed. In addition the names of these functions are confusing as they do not report the kind of locks that are needed to be held when these functions are called making error prone to use them. To remedy this I am making the base functio name lookup_fd and will and prefixes and sufficies to indicate the rest of the context. Name the function (previously called __fcheck_files) that proceeds from a struct files_struct, looks up the struct file of a file descriptor, and requires it's callers to verify all of the appropriate locks are held files_lookup_fd_raw. The need for better names became apparent in the last round of discussion of this set of changes[1]. [1] https://lkml.kernel.org/r/CAHk-=wj8BQbgJFLa+J0e=iT-1qpmCRTbPAJ8gd6MJQ=kbRPqyQ@mail.gmail.com Link: https://lkml.kernel.org/r/20201120231441.29911-7-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 7cc9885044d9..639933f37da9 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -80,7 +80,7 @@ struct dentry; /* * The caller must ensure that fd table isn't shared or hold rcu or file lock */ -static inline struct file *__fcheck_files(struct files_struct *files, unsigned int fd) +static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = rcu_dereference_raw(files->fdt); @@ -96,7 +96,7 @@ static inline struct file *fcheck_files(struct files_struct *files, unsigned int RCU_LOCKDEP_WARN(!rcu_read_lock_held() && !lockdep_is_held(&files->file_lock), "suspicious rcu_dereference_check() usage"); - return __fcheck_files(files, fd); + return files_lookup_fd_raw(files, fd); } /* -- cgit v1.2.3 From 120ce2b0cd52abe73e8b16c23461eb14df5a87d8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:25 -0600 Subject: file: Factor files_lookup_fd_locked out of fcheck_files To make it easy to tell where files->file_lock protection is being used when looking up a file create files_lookup_fd_locked. Only allow this function to be called with the file_lock held. Update the callers of fcheck and fcheck_files that are called with the files->file_lock held to call files_lookup_fd_locked instead. Hopefully this makes it easier to quickly understand what is going on. The need for better names became apparent in the last round of discussion of this set of changes[1]. [1] https://lkml.kernel.org/r/CAHk-=wj8BQbgJFLa+J0e=iT-1qpmCRTbPAJ8gd6MJQ=kbRPqyQ@mail.gmail.com Link: https://lkml.kernel.org/r/20201120231441.29911-8-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 639933f37da9..fda4b81dd735 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -91,6 +91,13 @@ static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsig return NULL; } +static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd) +{ + RCU_LOCKDEP_WARN(!lockdep_is_held(&files->file_lock), + "suspicious rcu_dereference_check() usage"); + return files_lookup_fd_raw(files, fd); +} + static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd) { RCU_LOCKDEP_WARN(!rcu_read_lock_held() && -- cgit v1.2.3 From f36c2943274199cb8aef32ac96531ffb7c4b43d0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:26 -0600 Subject: file: Replace fcheck_files with files_lookup_fd_rcu This change renames fcheck_files to files_lookup_fd_rcu. All of the remaining callers take the rcu_read_lock before calling this function so the _rcu suffix is appropriate. This change also tightens up the debug check to verify that all callers hold the rcu_read_lock. All callers that used to call files_check with the files->file_lock held have now been changed to call files_lookup_fd_locked. This change of name has helped remind me of which locks and which guarantees are in place helping me to catch bugs later in the patchset. The need for better names became apparent in the last round of discussion of this set of changes[1]. [1] https://lkml.kernel.org/r/CAHk-=wj8BQbgJFLa+J0e=iT-1qpmCRTbPAJ8gd6MJQ=kbRPqyQ@mail.gmail.com Link: https://lkml.kernel.org/r/20201120231441.29911-9-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index fda4b81dd735..fa8c402a7790 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -98,10 +98,9 @@ static inline struct file *files_lookup_fd_locked(struct files_struct *files, un return files_lookup_fd_raw(files, fd); } -static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd) +static inline struct file *files_lookup_fd_rcu(struct files_struct *files, unsigned int fd) { - RCU_LOCKDEP_WARN(!rcu_read_lock_held() && - !lockdep_is_held(&files->file_lock), + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious rcu_dereference_check() usage"); return files_lookup_fd_raw(files, fd); } @@ -109,7 +108,7 @@ static inline struct file *fcheck_files(struct files_struct *files, unsigned int /* * Check whether the specified fd has an open file. */ -#define fcheck(fd) fcheck_files(current->files, fd) +#define fcheck(fd) files_lookup_fd_rcu(current->files, fd) struct task_struct; -- cgit v1.2.3 From 460b4f812a9d473d4b39d87d37844f9fc30a9eb3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:27 -0600 Subject: file: Rename fcheck lookup_fd_rcu Also remove the confusing comment about checking if a fd exists. I could not find one instance in the entire kernel that still matches the description or the reason for the name fcheck. The need for better names became apparent in the last round of discussion of this set of changes[1]. [1] https://lkml.kernel.org/r/CAHk-=wj8BQbgJFLa+J0e=iT-1qpmCRTbPAJ8gd6MJQ=kbRPqyQ@mail.gmail.com Link: https://lkml.kernel.org/r/20201120231441.29911-10-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index fa8c402a7790..2a4a8fed536e 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -105,10 +105,10 @@ static inline struct file *files_lookup_fd_rcu(struct files_struct *files, unsig return files_lookup_fd_raw(files, fd); } -/* - * Check whether the specified fd has an open file. - */ -#define fcheck(fd) files_lookup_fd_rcu(current->files, fd) +static inline struct file *lookup_fd_rcu(unsigned int fd) +{ + return files_lookup_fd_rcu(current->files, fd); +} struct task_struct; -- cgit v1.2.3 From 3a879fb38082125cc0d8aa89b70c7f3a7cdf584b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:28 -0600 Subject: file: Implement task_lookup_fd_rcu As a companion to lookup_fd_rcu implement task_lookup_fd_rcu for querying an arbitrary process about a specific file. Acked-by: Christian Brauner v1: https://lkml.kernel.org/r/20200818103713.aw46m7vprsy4vlve@wittgenstein Link: https://lkml.kernel.org/r/20201120231441.29911-11-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 2a4a8fed536e..a0558ae9b40c 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -110,6 +110,8 @@ static inline struct file *lookup_fd_rcu(unsigned int fd) return files_lookup_fd_rcu(current->files, fd); } +struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd); + struct task_struct; struct files_struct *get_files_struct(struct task_struct *); -- cgit v1.2.3 From e9a53aeb5e0a838f10fcea74235664e7ad5e6e1a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:31 -0600 Subject: file: Implement task_lookup_next_fd_rcu As a companion to fget_task and task_lookup_fd_rcu implement task_lookup_next_fd_rcu that will return the struct file for the first file descriptor number that is equal or greater than the fd argument value, or NULL if there is no such struct file. This allows file descriptors of foreign processes to be iterated through safely, without needed to increment the count on files_struct. Some concern[1] has been expressed that this function takes the task_lock for each iteration and thus for each file descriptor. This place where this function will be called in a commonly used code path is for listing /proc//fd. I did some small benchmarks and did not see any measurable performance differences. For ordinary users ls is likely to stat each of the directory entries and tid_fd_mode called from tid_fd_revalidae has always taken the task lock for each file descriptor. So this does not look like it will be a big change in practice. At some point is will probably be worth changing put_files_struct to free files_struct after an rcu grace period so that task_lock won't be needed at all. [1] https://lkml.kernel.org/r/20200817220425.9389-10-ebiederm@xmission.com v1: https://lkml.kernel.org/r/20200817220425.9389-9-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-14-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index a0558ae9b40c..cf6c52dae3a1 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -111,6 +111,7 @@ static inline struct file *lookup_fd_rcu(unsigned int fd) } struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd); +struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *fd); struct task_struct; -- cgit v1.2.3 From d74ba04d919ebe30bf47406819c18c6b50003d92 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:35 -0600 Subject: file: Merge __fd_install into fd_install The function __fd_install was added to support binder[1]. With binder fixed[2] there are no more users. As fd_install just calls __fd_install with "files=current->files", merge them together by transforming the files parameter into a local variable initialized to current->files. [1] f869e8a7f753 ("expose a low-level variant of fd_install() for binder") [2] 44d8047f1d87 ("binder: use standard functions to allocate fds") Acked-by: Christian Brauner v1:https://lkml.kernel.org/r/20200817220425.9389-14-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-18-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index cf6c52dae3a1..a5ec736d74a5 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -126,8 +126,6 @@ int iterate_fd(struct files_struct *, unsigned, extern int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags); -extern void __fd_install(struct files_struct *files, - unsigned int fd, struct file *file); extern int __close_fd(struct files_struct *files, unsigned int fd); extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); -- cgit v1.2.3 From aa384d10f3d06d4b85597ff5df41551262220e16 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:37 -0600 Subject: file: Merge __alloc_fd into alloc_fd The function __alloc_fd was added to support binder[1]. With binder fixed[2] there are no more users. As alloc_fd just calls __alloc_fd with "files=current->files", merge them together by transforming the files parameter into a local variable initialized to current->files. [1] dcfadfa4ec5a ("new helper: __alloc_fd()") [2] 44d8047f1d87 ("binder: use standard functions to allocate fds") Acked-by: Christian Brauner v1: https://lkml.kernel.org/r/20200817220425.9389-16-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-20-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index a5ec736d74a5..dc476ae92f56 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -124,8 +124,6 @@ int iterate_fd(struct files_struct *, unsigned, int (*)(const void *, struct file *, unsigned), const void *); -extern int __alloc_fd(struct files_struct *files, - unsigned start, unsigned end, unsigned flags); extern int __close_fd(struct files_struct *files, unsigned int fd); extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); -- cgit v1.2.3 From 8760c909f54a82aaa6e76da19afe798a0c77c3c3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:38 -0600 Subject: file: Rename __close_fd to close_fd and remove the files parameter The function __close_fd was added to support binder[1]. Now that binder has been fixed to no longer need __close_fd[2] all calls to __close_fd pass current->files. Therefore transform the files parameter into a local variable initialized to current->files, and rename __close_fd to close_fd to reflect this change, and keep it in sync with the similar changes to __alloc_fd, and __fd_install. This removes the need for callers to care about the extra care that needs to be take if anything except current->files is passed, by limiting the callers to only operation on current->files. [1] 483ce1d4b8c3 ("take descriptor-related part of close() to file.c") [2] 44d8047f1d87 ("binder: use standard functions to allocate fds") Acked-by: Christian Brauner v1: https://lkml.kernel.org/r/20200817220425.9389-17-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-21-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 3 +-- include/linux/syscalls.h | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index dc476ae92f56..dad4a488ce60 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -124,8 +124,7 @@ int iterate_fd(struct files_struct *, unsigned, int (*)(const void *, struct file *, unsigned), const void *); -extern int __close_fd(struct files_struct *files, - unsigned int fd); +extern int close_fd(unsigned int fd); extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); extern int __close_fd_get_file(unsigned int fd, struct file **res); extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 37bea07c12f2..9e055a0579f8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1295,16 +1295,16 @@ static inline long ksys_ftruncate(unsigned int fd, loff_t length) return do_sys_ftruncate(fd, length, 1); } -extern int __close_fd(struct files_struct *files, unsigned int fd); +extern int close_fd(unsigned int fd); /* * In contrast to sys_close(), this stub does not check whether the syscall * should or should not be restarted, but returns the raw error codes from - * __close_fd(). + * close_fd(). */ static inline int ksys_close(unsigned int fd) { - return __close_fd(current->files, fd); + return close_fd(fd); } extern long do_sys_truncate(const char __user *pathname, loff_t length); -- cgit v1.2.3 From 1572bfdf21d4d50e51941498ffe0b56c2289f783 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:39 -0600 Subject: file: Replace ksys_close with close_fd Now that ksys_close is exactly identical to close_fd replace the one caller of ksys_close with close_fd. [1] https://lkml.kernel.org/r/20200818112020.GA17080@infradead.org Suggested-by: Christoph Hellwig Link: https://lkml.kernel.org/r/20201120231441.29911-22-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/syscalls.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 9e055a0579f8..0f72f380db72 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1295,18 +1295,6 @@ static inline long ksys_ftruncate(unsigned int fd, loff_t length) return do_sys_ftruncate(fd, length, 1); } -extern int close_fd(unsigned int fd); - -/* - * In contrast to sys_close(), this stub does not check whether the syscall - * should or should not be restarted, but returns the raw error codes from - * close_fd(). - */ -static inline int ksys_close(unsigned int fd) -{ - return close_fd(fd); -} - extern long do_sys_truncate(const char __user *pathname, loff_t length); static inline long ksys_truncate(const char __user *pathname, loff_t length) -- cgit v1.2.3 From 9fe83c43e71cdb8e5b9520bcb98706a2b3c680c8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:40 -0600 Subject: file: Rename __close_fd_get_file close_fd_get_file The function close_fd_get_file is explicitly a variant of __close_fd[1]. Now that __close_fd has been renamed close_fd, rename close_fd_get_file to be consistent with close_fd. When __alloc_fd, __close_fd and __fd_install were introduced the double underscore indicated that the function took a struct files_struct parameter. The function __close_fd_get_file never has so the naming has always been inconsistent. This just cleans things up so there are not any lingering mentions or references __close_fd left in the code. [1] 80cd795630d6 ("binder: fix use-after-free due to ksys_close() during fdget()") Link: https://lkml.kernel.org/r/20201120231441.29911-23-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index dad4a488ce60..4ed3589f9294 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -126,7 +126,7 @@ int iterate_fd(struct files_struct *, unsigned, extern int close_fd(unsigned int fd); extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); -extern int __close_fd_get_file(unsigned int fd, struct file **res); +extern int close_fd_get_file(unsigned int fd, struct file **res); extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, struct files_struct **new_fdp); -- cgit v1.2.3 From fa67bf885e5211c7dce9514ef2877212c0a5e09e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Nov 2020 17:14:41 -0600 Subject: file: Remove get_files_struct When discussing[1] exec and posix file locks it was realized that none of the callers of get_files_struct fundamentally needed to call get_files_struct, and that by switching them to helper functions instead it will both simplify their code and remove unnecessary increments of files_struct.count. Those unnecessary increments can result in exec unnecessarily unsharing files_struct which breaking posix locks, and it can result in fget_light having to fallback to fget reducing system performance. Now that get_files_struct has no more users and can not cause the problems for posix file locking and fget_light remove get_files_struct so that it does not gain any new users. [1] https://lkml.kernel.org/r/20180915160423.GA31461@redhat.com Suggested-by: Oleg Nesterov Acked-by: Christian Brauner v1: https://lkml.kernel.org/r/20200817220425.9389-13-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20201120231441.29911-24-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/fdtable.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 4ed3589f9294..d0e78174874a 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -115,7 +115,6 @@ struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *fd) struct task_struct; -struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); int unshare_files(void); struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy; -- cgit v1.2.3 From f7cfd871ae0c5008d94b6f66834e7845caa93c15 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 3 Dec 2020 14:12:00 -0600 Subject: exec: Transform exec_update_mutex into a rw_semaphore Recently syzbot reported[0] that there is a deadlock amongst the users of exec_update_mutex. The problematic lock ordering found by lockdep was: perf_event_open (exec_update_mutex -> ovl_i_mutex) chown (ovl_i_mutex -> sb_writes) sendfile (sb_writes -> p->lock) by reading from a proc file and writing to overlayfs proc_pid_syscall (p->lock -> exec_update_mutex) While looking at possible solutions it occured to me that all of the users and possible users involved only wanted to state of the given process to remain the same. They are all readers. The only writer is exec. There is no reason for readers to block on each other. So fix this deadlock by transforming exec_update_mutex into a rw_semaphore named exec_update_lock that only exec takes for writing. Cc: Jann Horn Cc: Vasiliy Kulikov Cc: Al Viro Cc: Bernd Edlinger Cc: Oleg Nesterov Cc: Christopher Yeoh Cc: Cyrill Gorcunov Cc: Sargun Dhillon Cc: Christian Brauner Cc: Arnd Bergmann Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Fixes: eea9673250db ("exec: Add exec_update_mutex to replace cred_guard_mutex") [0] https://lkml.kernel.org/r/00000000000063640c05ade8e3de@google.com Reported-by: syzbot+db9cdf3dd1f64252c6ef@syzkaller.appspotmail.com Link: https://lkml.kernel.org/r/87ft4mbqen.fsf@x220.int.ebiederm.org Signed-off-by: Eric W. Biederman --- include/linux/sched/signal.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 1bad18a1d8ba..4b6a8234d7fc 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -228,12 +228,13 @@ struct signal_struct { * credential calculations * (notably. ptrace) * Deprecated do not use in new code. - * Use exec_update_mutex instead. - */ - struct mutex exec_update_mutex; /* Held while task_struct is being - * updated during exec, and may have - * inconsistent permissions. + * Use exec_update_lock instead. */ + struct rw_semaphore exec_update_lock; /* Held while task_struct is + * being updated during exec, + * and may have inconsistent + * permissions. + */ } __randomize_layout; /* -- cgit v1.2.3 From adf60a870e9130c7883ec2ab798484e05f24db39 Mon Sep 17 00:00:00 2001 From: Siddharth Gupta Date: Thu, 19 Nov 2020 13:05:32 -0800 Subject: remoteproc: core: Add ops to enable custom coredump functionality Each remoteproc might have different requirements for coredumps and might want to choose the type of dumps it wants to collect. This change allows remoteproc drivers to specify their own custom dump function to be executed in place of rproc_coredump. If the coredump op is not specified by the remoteproc driver it will be set to rproc_coredump by default. Reviewed-by: Bjorn Andersson Signed-off-by: Siddharth Gupta Link: https://lore.kernel.org/r/1605819935-10726-2-git-send-email-sidgup@codeaurora.org Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index e8ac041c64d9..121c55eff3d2 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -375,6 +375,7 @@ enum rsc_handling_status { * @get_boot_addr: get boot address to entry point specified in firmware * @panic: optional callback to react to system panic, core will delay * panic at least the returned number of milliseconds + * @coredump: collect firmware dump after the subsystem is shutdown */ struct rproc_ops { int (*prepare)(struct rproc *rproc); @@ -393,6 +394,7 @@ struct rproc_ops { int (*sanity_check)(struct rproc *rproc, const struct firmware *fw); u64 (*get_boot_addr)(struct rproc *rproc, const struct firmware *fw); unsigned long (*panic)(struct rproc *rproc); + void (*coredump)(struct rproc *rproc); }; /** -- cgit v1.2.3 From abc72b646066075acf9121a2a68aad39f550813d Mon Sep 17 00:00:00 2001 From: Siddharth Gupta Date: Thu, 19 Nov 2020 13:05:33 -0800 Subject: remoteproc: coredump: Add minidump functionality This change adds a new kind of core dump mechanism which instead of dumping entire program segments of the firmware, dumps sections of the remoteproc memory which are sufficient to allow debugging the firmware. This function thus uses section headers instead of program headers during creation of the core dump elf. Reviewed-by: Bjorn Andersson Co-developed-by: Rishabh Bhatnagar Signed-off-by: Rishabh Bhatnagar Signed-off-by: Siddharth Gupta Link: https://lore.kernel.org/r/1605819935-10726-3-git-send-email-sidgup@codeaurora.org Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 121c55eff3d2..f28ee75d1005 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -657,6 +657,7 @@ int rproc_boot(struct rproc *rproc); void rproc_shutdown(struct rproc *rproc); int rproc_set_firmware(struct rproc *rproc, const char *fw_name); void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type); +void rproc_coredump_using_sections(struct rproc *rproc); int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size); int rproc_coredump_add_custom_segment(struct rproc *rproc, dma_addr_t da, size_t size, -- cgit v1.2.3 From e7708f5b10e205d6291bb495e645a03553b9768b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Sun, 29 Nov 2020 23:07:39 +0000 Subject: PCI: Unify ECAM constants in native PCI Express drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add ECAM-related constants to provide a set of standard constants defining memory address shift values to the byte-level address that can be used to access the PCI Express Configuration Space, and then move native PCI Express controller drivers to use the newly introduced definitions retiring driver-specific ones. Refactor pci_ecam_map_bus() function to use newly added constants so that limits to the bus, device function and offset (now limited to 4K as per the specification) are in place to prevent the defective or malicious caller from supplying incorrect configuration offset and thus targeting the wrong device when accessing extended configuration space. This refactor also allows for the ".bus_shift" initialisers to be dropped when the user is not using a custom value as a default value will be used as per the PCI Express Specification. Thanks to Qian Cai , Michael Walle , and Vladimir Oltean for reporting a pci_ecam_create() issue with .bus_shift and to Vladimir for proposing the fix. [bhelgaas: incorporate Vladimir's fix, update commit log] Suggested-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20201129230743.3006978-2-kw@linux.com Tested-by: Michael Walle Signed-off-by: Krzysztof Wilczyński Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Jon Derrick Reviewed-by: Bjorn Helgaas --- include/linux/pci-ecam.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 033ce74f02e8..65d3d83015c3 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -9,6 +9,33 @@ #include #include +/* + * Memory address shift values for the byte-level address that + * can be used when accessing the PCI Express Configuration Space. + */ + +/* + * Enhanced Configuration Access Mechanism (ECAM) + * + * See PCI Express Base Specification, Revision 5.0, Version 1.0, + * Section 7.2.2, Table 7-1, p. 677. + */ +#define PCIE_ECAM_BUS_SHIFT 20 /* Bus number */ +#define PCIE_ECAM_DEVFN_SHIFT 12 /* Device and Function number */ + +#define PCIE_ECAM_BUS_MASK 0xff +#define PCIE_ECAM_DEVFN_MASK 0xff +#define PCIE_ECAM_REG_MASK 0xfff /* Limit offset to a maximum of 4K */ + +#define PCIE_ECAM_BUS(x) (((x) & PCIE_ECAM_BUS_MASK) << PCIE_ECAM_BUS_SHIFT) +#define PCIE_ECAM_DEVFN(x) (((x) & PCIE_ECAM_DEVFN_MASK) << PCIE_ECAM_DEVFN_SHIFT) +#define PCIE_ECAM_REG(x) ((x) & PCIE_ECAM_REG_MASK) + +#define PCIE_ECAM_OFFSET(bus, devfn, where) \ + (PCIE_ECAM_BUS(bus) | \ + PCIE_ECAM_DEVFN(devfn) | \ + PCIE_ECAM_REG(where)) + /* * struct to hold pci ops and bus shift of the config window * for a PCI controller. -- cgit v1.2.3 From 9994bb3f36e3d181d9f0a078609038080cfd7a3d Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:13 +0200 Subject: mtd: nand: ecc-bch: Create the software BCH engine Let's continue introducing the generic ECC engine abstraction in the NAND subsystem by instantiating a first ECC engine: the software BCH one. While at it, make a very tidy ecc_sw_bch_init() function and move all the sanity checks and user input management in nand_ecc_sw_bch_init_ctx(). This second helper will be called from the raw RAND core. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-10-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-bch.h | 16 ++++++++-------- include/linux/mtd/nand.h | 9 +++++++++ 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-bch.h b/include/linux/mtd/nand-ecc-sw-bch.h index ce005528e55f..22c92073b3dd 100644 --- a/include/linux/mtd/nand-ecc-sw-bch.h +++ b/include/linux/mtd/nand-ecc-sw-bch.h @@ -13,8 +13,8 @@ /** * struct nand_ecc_sw_bch_conf - private software BCH ECC engine structure - * @reqooblen: Save the actual user OOB length requested before overwriting it - * @spare_oobbuf: Spare OOB buffer if none is provided + * @req_ctx: Save request context and tweak the original request to fit the + * engine needs * @code_size: Number of bytes needed to store a code (one code per step) * @nsteps: Number of steps * @calc_buf: Buffer to use when calculating ECC bytes @@ -24,8 +24,7 @@ * @eccmask: XOR ecc mask, allows erased pages to be decoded as valid */ struct nand_ecc_sw_bch_conf { - unsigned int reqooblen; - void *spare_oobbuf; + struct nand_ecc_req_tweak_ctx req_ctx; unsigned int code_size; unsigned int nsteps; u8 *calc_buf; @@ -41,8 +40,9 @@ int nand_ecc_sw_bch_calculate(struct nand_device *nand, const unsigned char *buf, unsigned char *code); int nand_ecc_sw_bch_correct(struct nand_device *nand, unsigned char *buf, unsigned char *read_ecc, unsigned char *calc_ecc); -int nand_ecc_sw_bch_init(struct nand_device *nand); -void nand_ecc_sw_bch_cleanup(struct nand_device *nand); +int nand_ecc_sw_bch_init_ctx(struct nand_device *nand); +void nand_ecc_sw_bch_cleanup_ctx(struct nand_device *nand); +struct nand_ecc_engine *nand_ecc_sw_bch_get_engine(void); #else /* !CONFIG_MTD_NAND_ECC_SW_BCH */ @@ -61,12 +61,12 @@ static inline int nand_ecc_sw_bch_correct(struct nand_device *nand, return -ENOTSUPP; } -static inline int nand_ecc_sw_bch_init(struct nand_device *nand) +static inline int nand_ecc_sw_bch_init_ctx(struct nand_device *nand) { return -ENOTSUPP; } -static inline void nand_ecc_sw_bch_cleanup(struct nand_device *nand) {} +static inline void nand_ecc_sw_bch_cleanup_ctx(struct nand_device *nand) {} #endif /* CONFIG_MTD_NAND_ECC_SW_BCH */ diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 36e4fe08d0ea..df8548187713 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -278,6 +278,15 @@ int nand_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req); bool nand_ecc_is_strong_enough(struct nand_device *nand); +#if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) +struct nand_ecc_engine *nand_ecc_sw_bch_get_engine(void); +#else +static inline struct nand_ecc_engine *nand_ecc_sw_bch_get_engine(void) +{ + return NULL; +} +#endif /* CONFIG_MTD_NAND_ECC_SW_BCH */ + /** * struct nand_ecc_req_tweak_ctx - Help for automatically tweaking requests * @orig_req: Pointer to the original IO request -- cgit v1.2.3 From cbd87780bed580b585d2992f29077ac44950cb66 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:14 +0200 Subject: mtd: rawnand: Get rid of chip->ecc.priv nand_ecc_ctrl embeds a private pointer which only has a meaning in the sunxi driver. This structure will soon be deprecated, but as this field is actually not needed, let's just drop it. Cc: Maxime Ripard Cc: Chen-Yu Tsai Signed-off-by: Miquel Raynal Acked-by: Maxime Ripard Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-11-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 23623beaad1d..15743c2c2cab 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -302,7 +302,6 @@ static const struct nand_ecc_caps __name = { \ * @prepad: padding information for syndrome based ECC generators * @postpad: padding information for syndrome based ECC generators * @options: ECC specific options (see NAND_ECC_XXX flags defined above) - * @priv: pointer to private ECC control data * @calc_buf: buffer for calculated ECC, size is oobsize. * @code_buf: buffer for ECC read from flash, size is oobsize. * @hwctl: function to control hardware ECC generator. Must only @@ -355,7 +354,6 @@ struct nand_ecc_ctrl { int prepad; int postpad; unsigned int options; - void *priv; u8 *calc_buf; u8 *code_buf; void (*hwctl)(struct nand_chip *chip, int mode); -- cgit v1.2.3 From e5acf9c862974041f7b2f581d1a40ccd29769add Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:15 +0200 Subject: mtd: nand: ecc-hamming: Move Hamming code to the generic NAND layer Hamming ECC code might be later re-used by the SPI NAND layer. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-12-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-hamming.h | 39 +++++++++++++++++++++++++++++++++ include/linux/mtd/nand_ecc.h | 39 --------------------------------- include/linux/mtd/sharpsl.h | 2 +- 3 files changed, 40 insertions(+), 40 deletions(-) create mode 100644 include/linux/mtd/nand-ecc-sw-hamming.h delete mode 100644 include/linux/mtd/nand_ecc.h (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-hamming.h b/include/linux/mtd/nand-ecc-sw-hamming.h new file mode 100644 index 000000000000..30a0cfa50eed --- /dev/null +++ b/include/linux/mtd/nand-ecc-sw-hamming.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2000-2010 Steven J. Hill + * David Woodhouse + * Thomas Gleixner + * + * This file is the header for the ECC algorithm. + */ + +#ifndef __MTD_NAND_ECC_SW_HAMMING_H__ +#define __MTD_NAND_ECC_SW_HAMMING_H__ + +struct nand_chip; + +/* + * Calculate 3 byte ECC code for eccsize byte block + */ +void __nand_calculate_ecc(const u_char *dat, unsigned int eccsize, + u_char *ecc_code, bool sm_order); + +/* + * Calculate 3 byte ECC code for 256/512 byte block + */ +int nand_calculate_ecc(struct nand_chip *chip, const u_char *dat, + u_char *ecc_code); + +/* + * Detect and correct a 1 bit error for eccsize byte block + */ +int __nand_correct_data(u_char *dat, u_char *read_ecc, u_char *calc_ecc, + unsigned int eccsize, bool sm_order); + +/* + * Detect and correct a 1 bit error for 256/512 byte block + */ +int nand_correct_data(struct nand_chip *chip, u_char *dat, u_char *read_ecc, + u_char *calc_ecc); + +#endif /* __MTD_NAND_ECC_SW_HAMMING_H__ */ diff --git a/include/linux/mtd/nand_ecc.h b/include/linux/mtd/nand_ecc.h deleted file mode 100644 index d423916b94f0..000000000000 --- a/include/linux/mtd/nand_ecc.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2000-2010 Steven J. Hill - * David Woodhouse - * Thomas Gleixner - * - * This file is the header for the ECC algorithm. - */ - -#ifndef __MTD_NAND_ECC_H__ -#define __MTD_NAND_ECC_H__ - -struct nand_chip; - -/* - * Calculate 3 byte ECC code for eccsize byte block - */ -void __nand_calculate_ecc(const u_char *dat, unsigned int eccsize, - u_char *ecc_code, bool sm_order); - -/* - * Calculate 3 byte ECC code for 256/512 byte block - */ -int nand_calculate_ecc(struct nand_chip *chip, const u_char *dat, - u_char *ecc_code); - -/* - * Detect and correct a 1 bit error for eccsize byte block - */ -int __nand_correct_data(u_char *dat, u_char *read_ecc, u_char *calc_ecc, - unsigned int eccsize, bool sm_order); - -/* - * Detect and correct a 1 bit error for 256/512 byte block - */ -int nand_correct_data(struct nand_chip *chip, u_char *dat, u_char *read_ecc, - u_char *calc_ecc); - -#endif /* __MTD_NAND_ECC_H__ */ diff --git a/include/linux/mtd/sharpsl.h b/include/linux/mtd/sharpsl.h index d2c3cf29e0d1..3762a90077d6 100644 --- a/include/linux/mtd/sharpsl.h +++ b/include/linux/mtd/sharpsl.h @@ -9,7 +9,7 @@ #define _MTD_SHARPSL_H #include -#include +#include #include struct sharpsl_nand_platform_data { -- cgit v1.2.3 From 2dbe0192efa02f2f405e193f4de84bf07c7f91fb Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:16 +0200 Subject: mtd: nand: ecc-hamming: Clarify the driver descriptions The include file pretends being the header for "ECC algorithm", while it is just the header for the Hamming implementation. Make this clear by rewording the sentence. Do the same with the module description. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-13-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-hamming.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-hamming.h b/include/linux/mtd/nand-ecc-sw-hamming.h index 30a0cfa50eed..85e9a929b5f9 100644 --- a/include/linux/mtd/nand-ecc-sw-hamming.h +++ b/include/linux/mtd/nand-ecc-sw-hamming.h @@ -4,7 +4,7 @@ * David Woodhouse * Thomas Gleixner * - * This file is the header for the ECC algorithm. + * This file is the header for the NAND Hamming ECC implementation. */ #ifndef __MTD_NAND_ECC_SW_HAMMING_H__ -- cgit v1.2.3 From 90ccf0a0192f7fa06e52de80cb528c5217e3e297 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:19 +0200 Subject: mtd: nand: ecc-hamming: Rename the exported functions Prefix by ecc_sw_hamming_ the functions which should be internal only but are exported for "raw" operations. Prefix by nand_ecc_sw_hamming_ the other functions which will be used in the context of the declaration of an Hamming proper ECC engine object. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-16-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-hamming.h | 36 +++++++++++---------------------- include/linux/mtd/rawnand.h | 7 +++++++ 2 files changed, 19 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-hamming.h b/include/linux/mtd/nand-ecc-sw-hamming.h index 85e9a929b5f9..84a123bf45f3 100644 --- a/include/linux/mtd/nand-ecc-sw-hamming.h +++ b/include/linux/mtd/nand-ecc-sw-hamming.h @@ -10,30 +10,18 @@ #ifndef __MTD_NAND_ECC_SW_HAMMING_H__ #define __MTD_NAND_ECC_SW_HAMMING_H__ -struct nand_chip; +#include -/* - * Calculate 3 byte ECC code for eccsize byte block - */ -void __nand_calculate_ecc(const u_char *dat, unsigned int eccsize, - u_char *ecc_code, bool sm_order); - -/* - * Calculate 3 byte ECC code for 256/512 byte block - */ -int nand_calculate_ecc(struct nand_chip *chip, const u_char *dat, - u_char *ecc_code); - -/* - * Detect and correct a 1 bit error for eccsize byte block - */ -int __nand_correct_data(u_char *dat, u_char *read_ecc, u_char *calc_ecc, - unsigned int eccsize, bool sm_order); - -/* - * Detect and correct a 1 bit error for 256/512 byte block - */ -int nand_correct_data(struct nand_chip *chip, u_char *dat, u_char *read_ecc, - u_char *calc_ecc); +int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size, + unsigned char *code, bool sm_order); +int nand_ecc_sw_hamming_calculate(struct nand_device *nand, + const unsigned char *buf, + unsigned char *code); +int ecc_sw_hamming_correct(unsigned char *buf, unsigned char *read_ecc, + unsigned char *calc_ecc, unsigned int step_size, + bool sm_order); +int nand_ecc_sw_hamming_correct(struct nand_device *nand, unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc); #endif /* __MTD_NAND_ECC_SW_HAMMING_H__ */ diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 15743c2c2cab..685d24557e95 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1301,6 +1301,13 @@ static inline int nand_opcode_8bits(unsigned int command) return 0; } +int rawnand_sw_hamming_calculate(struct nand_chip *chip, + const unsigned char *buf, + unsigned char *code); +int rawnand_sw_hamming_correct(struct nand_chip *chip, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc); int rawnand_sw_bch_init(struct nand_chip *chip); int rawnand_sw_bch_correct(struct nand_chip *chip, unsigned char *buf, unsigned char *read_ecc, unsigned char *calc_ecc); -- cgit v1.2.3 From 19b2ce184b9f404d6620adf667a9019e6abcae51 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:20 +0200 Subject: mtd: nand: ecc-hamming: Stop using raw NAND structures This code is meant to be reused by the SPI-NAND core. Now that the driver has been cleaned and reorganized, use a generic ECC engine object to store the driver's data instead of accessing members of the nand_chip structure. This means adding proper init/cleanup helpers. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-17-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-hamming.h | 20 ++++++++++++++++++++ include/linux/mtd/rawnand.h | 2 ++ 2 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-hamming.h b/include/linux/mtd/nand-ecc-sw-hamming.h index 84a123bf45f3..9d4b4d623741 100644 --- a/include/linux/mtd/nand-ecc-sw-hamming.h +++ b/include/linux/mtd/nand-ecc-sw-hamming.h @@ -12,6 +12,26 @@ #include +/** + * struct nand_ecc_sw_hamming_conf - private software Hamming ECC engine structure + * @reqooblen: Save the actual user OOB length requested before overwriting it + * @spare_oobbuf: Spare OOB buffer if none is provided + * @code_size: Number of bytes needed to store a code (one code per step) + * @nsteps: Number of steps + * @calc_buf: Buffer to use when calculating ECC bytes + * @code_buf: Buffer to use when reading (raw) ECC bytes from the chip + * @sm_order: Smart Media special ordering + */ +struct nand_ecc_sw_hamming_conf { + unsigned int reqooblen; + void *spare_oobbuf; + unsigned int code_size; + unsigned int nsteps; + u8 *calc_buf; + u8 *code_buf; + unsigned int sm_order; +}; + int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size, unsigned char *code, bool sm_order); int nand_ecc_sw_hamming_calculate(struct nand_device *nand, diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 685d24557e95..0a90a8792d18 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1301,6 +1301,7 @@ static inline int nand_opcode_8bits(unsigned int command) return 0; } +int rawnand_sw_hamming_init(struct nand_chip *chip); int rawnand_sw_hamming_calculate(struct nand_chip *chip, const unsigned char *buf, unsigned char *code); @@ -1308,6 +1309,7 @@ int rawnand_sw_hamming_correct(struct nand_chip *chip, unsigned char *buf, unsigned char *read_ecc, unsigned char *calc_ecc); +void rawnand_sw_hamming_cleanup(struct nand_chip *chip); int rawnand_sw_bch_init(struct nand_chip *chip); int rawnand_sw_bch_correct(struct nand_chip *chip, unsigned char *buf, unsigned char *read_ecc, unsigned char *calc_ecc); -- cgit v1.2.3 From eb08376a5dd943cf2a7360f236fe20bbd709fa95 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:21 +0200 Subject: mtd: nand: ecc-hamming: Remove useless includes Most of the includes are simply useless, drop them. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-18-miquel.raynal@bootlin.com --- include/linux/mtd/sharpsl.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/sharpsl.h b/include/linux/mtd/sharpsl.h index 3762a90077d6..231bd1c3f408 100644 --- a/include/linux/mtd/sharpsl.h +++ b/include/linux/mtd/sharpsl.h @@ -9,7 +9,6 @@ #define _MTD_SHARPSL_H #include -#include #include struct sharpsl_nand_platform_data { -- cgit v1.2.3 From 5180a62c12497aa491a7c79c062a9e3a884c9762 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:22 +0200 Subject: mtd: nand: ecc-hamming: Let the software Hamming ECC engine be unselected There is no reason to always embed the software Hamming ECC engine implementation. By default it is (with raw NAND), but we can let the user decide. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-19-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-hamming.h | 36 +++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-hamming.h b/include/linux/mtd/nand-ecc-sw-hamming.h index 9d4b4d623741..5a39e96c3546 100644 --- a/include/linux/mtd/nand-ecc-sw-hamming.h +++ b/include/linux/mtd/nand-ecc-sw-hamming.h @@ -32,6 +32,8 @@ struct nand_ecc_sw_hamming_conf { unsigned int sm_order; }; +#if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_HAMMING) + int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size, unsigned char *code, bool sm_order); int nand_ecc_sw_hamming_calculate(struct nand_device *nand, @@ -44,4 +46,38 @@ int nand_ecc_sw_hamming_correct(struct nand_device *nand, unsigned char *buf, unsigned char *read_ecc, unsigned char *calc_ecc); +#else /* !CONFIG_MTD_NAND_ECC_SW_HAMMING */ + +static inline int ecc_sw_hamming_calculate(const unsigned char *buf, + unsigned int step_size, + unsigned char *code, bool sm_order) +{ + return -ENOTSUPP; +} + +static inline int nand_ecc_sw_hamming_calculate(struct nand_device *nand, + const unsigned char *buf, + unsigned char *code) +{ + return -ENOTSUPP; +} + +static inline int ecc_sw_hamming_correct(unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc, + unsigned int step_size, bool sm_order) +{ + return -ENOTSUPP; +} + +static inline int nand_ecc_sw_hamming_correct(struct nand_device *nand, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc) +{ + return -ENOTSUPP; +} + +#endif /* CONFIG_MTD_NAND_ECC_SW_HAMMING */ + #endif /* __MTD_NAND_ECC_SW_HAMMING_H__ */ -- cgit v1.2.3 From 35fe1b98a0082ad3f576bcc420c74dab435da307 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:23 +0200 Subject: mtd: nand: ecc-hamming: Create the software Hamming engine Let's continue introducing the generic ECC engine abstraction in the NAND subsystem by instantiating a second ECC engine: software Hamming. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-20-miquel.raynal@bootlin.com --- include/linux/mtd/nand-ecc-sw-hamming.h | 16 ++++++++++++---- include/linux/mtd/nand.h | 9 +++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-ecc-sw-hamming.h b/include/linux/mtd/nand-ecc-sw-hamming.h index 5a39e96c3546..9f9073d86ff3 100644 --- a/include/linux/mtd/nand-ecc-sw-hamming.h +++ b/include/linux/mtd/nand-ecc-sw-hamming.h @@ -14,8 +14,8 @@ /** * struct nand_ecc_sw_hamming_conf - private software Hamming ECC engine structure - * @reqooblen: Save the actual user OOB length requested before overwriting it - * @spare_oobbuf: Spare OOB buffer if none is provided + * @req_ctx: Save request context and tweak the original request to fit the + * engine needs * @code_size: Number of bytes needed to store a code (one code per step) * @nsteps: Number of steps * @calc_buf: Buffer to use when calculating ECC bytes @@ -23,8 +23,7 @@ * @sm_order: Smart Media special ordering */ struct nand_ecc_sw_hamming_conf { - unsigned int reqooblen; - void *spare_oobbuf; + struct nand_ecc_req_tweak_ctx req_ctx; unsigned int code_size; unsigned int nsteps; u8 *calc_buf; @@ -34,6 +33,8 @@ struct nand_ecc_sw_hamming_conf { #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_HAMMING) +int nand_ecc_sw_hamming_init_ctx(struct nand_device *nand); +void nand_ecc_sw_hamming_cleanup_ctx(struct nand_device *nand); int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size, unsigned char *code, bool sm_order); int nand_ecc_sw_hamming_calculate(struct nand_device *nand, @@ -48,6 +49,13 @@ int nand_ecc_sw_hamming_correct(struct nand_device *nand, unsigned char *buf, #else /* !CONFIG_MTD_NAND_ECC_SW_HAMMING */ +static inline int nand_ecc_sw_hamming_init_ctx(struct nand_device *nand) +{ + return -ENOTSUPP; +} + +static inline void nand_ecc_sw_hamming_cleanup_ctx(struct nand_device *nand) {} + static inline int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size, unsigned char *code, bool sm_order) diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index df8548187713..3616fa27eaa1 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -278,6 +278,15 @@ int nand_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req); bool nand_ecc_is_strong_enough(struct nand_device *nand); +#if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_HAMMING) +struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void); +#else +static inline struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void) +{ + return NULL; +} +#endif /* CONFIG_MTD_NAND_ECC_SW_HAMMING */ + #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_BCH) struct nand_ecc_engine *nand_ecc_sw_bch_get_engine(void); #else -- cgit v1.2.3 From 53fbdeeb57a0168a88547e22f8d433810c531169 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 01:01:24 +0200 Subject: mtd: nand: Let software ECC engines be retrieved from the NAND core Before making use of the ECC engines, we must retrieve them. Add the boilerplate for the ones already available: software engines (Hamming and BCH). Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200929230124.31491-21-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 3616fa27eaa1..f78f61c9a9ee 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -277,6 +277,7 @@ int nand_ecc_prepare_io_req(struct nand_device *nand, int nand_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req); bool nand_ecc_is_strong_enough(struct nand_device *nand); +struct nand_ecc_engine *nand_ecc_get_sw_engine(struct nand_device *nand); #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_HAMMING) struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void); -- cgit v1.2.3 From 945845b54c9cf61809d1963492bb728ce8937964 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 17:41:07 +0200 Subject: mtd: spinand: Instantiate a SPI-NAND on-die ECC engine Make use of the existing functions taken from the SPI-NAND core to instantiate an on-die ECC engine specific to the SPI-NAND core. The next step will be to tweak the core to use this object instead of calling the helpers directly. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200930154109.3922-4-miquel.raynal@bootlin.com --- include/linux/mtd/spinand.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 7b78c4ba9b3e..6bb92f26833e 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -286,6 +286,15 @@ struct spinand_ecc_info { #define SPINAND_HAS_QE_BIT BIT(0) #define SPINAND_HAS_CR_FEAT_BIT BIT(1) +/** + * struct spinand_ondie_ecc_conf - private SPI-NAND on-die ECC engine structure + * @status: status of the last wait operation that will be used in case + * ->get_status() is not populated by the spinand device. + */ +struct spinand_ondie_ecc_conf { + u8 status; +}; + /** * struct spinand_info - Structure used to describe SPI NAND chips * @model: model name -- cgit v1.2.3 From da429b9615803b6f19e5734c4c4d99136e1e3bfd Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 30 Sep 2020 17:41:08 +0200 Subject: mtd: nand: Let on-die ECC engines be retrieved from the NAND core Before making use of the ECC engines, we must retrieve them. Add the necessary boilerplate. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200930154109.3922-5-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index f78f61c9a9ee..6c6f91c03c42 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -278,6 +278,7 @@ int nand_ecc_finish_io_req(struct nand_device *nand, struct nand_page_io_req *req); bool nand_ecc_is_strong_enough(struct nand_device *nand); struct nand_ecc_engine *nand_ecc_get_sw_engine(struct nand_device *nand); +struct nand_ecc_engine *nand_ecc_get_on_die_hw_engine(struct nand_device *nand); #if IS_ENABLED(CONFIG_MTD_NAND_ECC_SW_HAMMING) struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void); -- cgit v1.2.3 From 6b0c3b84156125e029956e46d2b44e72f513a9fa Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 1 Oct 2020 12:20:09 +0200 Subject: mtd: nand: Add helpers to manage ECC engines and configurations Add the logic in the NAND core to find the right ECC engine depending on the NAND chip requirements and the user desires. Right now, the choice may be made between (more will come): * software Hamming * software BCH * on-die (SPI-NAND devices only) Once the ECC engine has been found, the ECC engine must be configured. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20201001102014.20100-2-miquel.raynal@bootlin.com --- include/linux/mtd/nand.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 6c6f91c03c42..414f8a4d2853 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -936,6 +936,10 @@ bool nanddev_isreserved(struct nand_device *nand, const struct nand_pos *pos); int nanddev_erase(struct nand_device *nand, const struct nand_pos *pos); int nanddev_markbad(struct nand_device *nand, const struct nand_pos *pos); +/* ECC related functions */ +int nanddev_ecc_engine_init(struct nand_device *nand); +void nanddev_ecc_engine_cleanup(struct nand_device *nand); + /* BBT related functions */ enum nand_bbt_block_status { NAND_BBT_BLOCK_STATUS_UNKNOWN, -- cgit v1.2.3 From 7998d89875177a5fac9f963e230dbb828c218cb9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:33:04 +0200 Subject: mtd: rawnand: fix a kernel-doc markup Some identifiers have different names between their prototypes and the kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/9ed47a57d12c40e73a9b01612ee119d39baa6236.1603469755.git.mchehab+huawei@kernel.org --- include/linux/mtd/rawnand.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 0a90a8792d18..6b3240e44310 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1284,7 +1284,8 @@ static inline bool nand_is_slc(struct nand_chip *chip) } /** - * Check if the opcode's address should be sent only on the lower 8 bits + * nand_opcode_8bits - Check if the opcode's address should be sent only on the + * lower 8 bits * @command: opcode to check */ static inline int nand_opcode_8bits(unsigned int command) -- cgit v1.2.3 From 0f6b791955a6365b5ebe8b6a5b01de69a47ee92e Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 10 Nov 2020 09:19:08 -0300 Subject: mtd: rawnand: mxc: Remove platform data support i.MX is a devicetree-only platform now and the existing platform data support in this driver was only useful for old non-devicetree platforms. Get rid of the platform data support since it is no longer used. Signed-off-by: Fabio Estevam Reviewed-by: Sascha Hauer Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20201110121908.19400-1-festevam@gmail.com --- include/linux/platform_data/mtd-mxc_nand.h | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 include/linux/platform_data/mtd-mxc_nand.h (limited to 'include/linux') diff --git a/include/linux/platform_data/mtd-mxc_nand.h b/include/linux/platform_data/mtd-mxc_nand.h deleted file mode 100644 index d1230030c6db..000000000000 --- a/include/linux/platform_data/mtd-mxc_nand.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved. - * Copyright 2008 Sascha Hauer, kernel@pengutronix.de - */ - -#ifndef __ASM_ARCH_NAND_H -#define __ASM_ARCH_NAND_H - -#include - -struct mxc_nand_platform_data { - unsigned int width; /* data bus width in bytes */ - unsigned int hw_ecc:1; /* 0 if suppress hardware ECC */ - unsigned int flash_bbt:1; /* set to 1 to use a flash based bbt */ - struct mtd_partition *parts; /* partition table */ - int nr_parts; /* size of parts */ -}; -#endif /* __ASM_ARCH_NAND_H */ -- cgit v1.2.3 From bdfae1c9a913930eae5ea506733aa7c285e12a06 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 9 Dec 2020 09:44:44 +0800 Subject: vfio/type1: Add vfio_group_iommu_domain() Add the API for getting the domain from a vfio group. This could be used by the physical device drivers which rely on the vfio/mdev framework for mediated device user level access. The typical use case like below: unsigned int pasid; struct vfio_group *vfio_group; struct iommu_domain *iommu_domain; struct device *dev = mdev_dev(mdev); struct device *iommu_device = mdev_get_iommu_device(dev); if (!iommu_device || !iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX)) return -EINVAL; vfio_group = vfio_group_get_external_user_from_dev(dev); if (IS_ERR_OR_NULL(vfio_group)) return -EFAULT; iommu_domain = vfio_group_iommu_domain(vfio_group); if (IS_ERR_OR_NULL(iommu_domain)) { vfio_group_put_external_user(vfio_group); return -EFAULT; } pasid = iommu_aux_get_pasid(iommu_domain, iommu_device); if (pasid < 0) { vfio_group_put_external_user(vfio_group); return -EFAULT; } /* Program device context with pasid value. */ ... Signed-off-by: Lu Baolu Signed-off-by: Alex Williamson --- include/linux/vfio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 38d3c6a8dc7e..f45940b38a02 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -90,6 +90,8 @@ struct vfio_iommu_driver_ops { struct notifier_block *nb); int (*dma_rw)(void *iommu_data, dma_addr_t user_iova, void *data, size_t count, bool write); + struct iommu_domain *(*group_iommu_domain)(void *iommu_data, + struct iommu_group *group); }; extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); @@ -126,6 +128,8 @@ extern int vfio_group_unpin_pages(struct vfio_group *group, extern int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova, void *data, size_t len, bool write); +extern struct iommu_domain *vfio_group_iommu_domain(struct vfio_group *group); + /* each type has independent events */ enum vfio_notify_type { VFIO_IOMMU_NOTIFY = 0, -- cgit v1.2.3 From 88149082bb8ef31b289673669e080ec6a00c2e59 Mon Sep 17 00:00:00 2001 From: Hao Li Date: Tue, 8 Dec 2020 10:08:43 +0800 Subject: fs: Handle I_DONTCACHE in iput_final() instead of generic_drop_inode() If generic_drop_inode() returns true, it means iput_final() can evict this inode regardless of whether it is dirty or not. If we check I_DONTCACHE in generic_drop_inode(), any inode with this bit set will be evicted unconditionally. This is not the desired behavior because I_DONTCACHE only means the inode shouldn't be cached on the LRU list. As for whether we need to evict this inode, this is what generic_drop_inode() should do. This patch corrects the usage of I_DONTCACHE. This patch was proposed in [1]. [1]: https://lore.kernel.org/linux-fsdevel/20200831003407.GE12096@dread.disaster.area/ Fixes: dae2f8ed7992 ("fs: Lift XFS_IDONTCACHE to the VFS layer") Signed-off-by: Hao Li Reviewed-by: Dave Chinner Reviewed-by: Ira Weiny Signed-off-by: Al Viro --- include/linux/fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8667d0cdc71e..8bde32cf9711 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2878,8 +2878,7 @@ extern int inode_needs_sync(struct inode *inode); extern int generic_delete_inode(struct inode *inode); static inline int generic_drop_inode(struct inode *inode) { - return !inode->i_nlink || inode_unhashed(inode) || - (inode->i_state & I_DONTCACHE); + return !inode->i_nlink || inode_unhashed(inode); } extern void d_mark_dontcache(struct inode *inode); -- cgit v1.2.3 From c9e6189fb03123a7dfb93589280347b46f30b161 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 6 Dec 2020 22:46:18 +0100 Subject: ntp: Make the RTC synchronization more reliable Miroslav reported that the periodic RTC synchronization in the NTP code fails more often than not to hit the specified update window. The reason is that the code uses delayed_work to schedule the update which needs to be in thread context as the underlying RTC might be connected via a slow bus, e.g. I2C. In the update function it verifies whether the current time is correct vs. the requirements of the underlying RTC. But delayed_work is using the timer wheel for scheduling which is inaccurate by design. Depending on the distance to the expiry the wheel gets less granular to allow batching and to avoid the cascading of the original timer wheel. See 500462a9de65 ("timers: Switch to a non-cascading wheel") and the code for further details. The code already deals with this by splitting the 660 seconds period into a long 659 seconds timer and then retrying with a smaller delta. But looking at the actual granularities of the timer wheel (which depend on the HZ configuration) the 659 seconds timer ends up in an outer wheel level and is affected by a worst case granularity of: HZ Granularity 1000 32s 250 16s 100 40s So the initial timer can be already off by max 12.5% which is not a big issue as the period of the sync is defined as ~11 minutes. The fine grained second attempt schedules to the desired update point with a timer expiring less than a second from now. Depending on the actual delta and the HZ setting even the second attempt can end up in outer wheel levels which have a large enough granularity to make the correctness check fail. As this is a fundamental property of the timer wheel there is no way to make this more accurate short of iterating in one jiffies steps towards the update point. Switch it to an hrtimer instead which schedules the actual update work. The hrtimer will expire precisely (max 1 jiffie delay when high resolution timers are not available). The actual scheduling delay of the work is the same as before. The update is triggered from do_adjtimex() which is a bit racy but not much more racy than it was before: if (ntp_synced()) queue_delayed_work(system_power_efficient_wq, &sync_work, 0); which is racy when the work is currently executed and has not managed to reschedule itself. This becomes now: if (ntp_synced() && !hrtimer_is_queued(&sync_hrtimer)) queue_work(system_power_efficient_wq, &sync_work, 0); which is racy when the hrtimer has expired and the work is currently executed and has not yet managed to rearm the hrtimer. Not a big problem as it just schedules work for nothing. The new implementation has a safe guard in place to catch the case where the hrtimer is queued on entry to the work function and avoids an extra update attempt of the RTC that way. Reported-by: Miroslav Lichvar Signed-off-by: Thomas Gleixner Tested-by: Miroslav Lichvar Reviewed-by: Jason Gunthorpe Acked-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201206220542.062910520@linutronix.de --- include/linux/timex.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index ce0859763670..9c2e54faf9b7 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -157,7 +157,6 @@ extern int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * extern void hardpps(const struct timespec64 *, const struct timespec64 *); int read_current_timer(unsigned long *timer_val); -void ntp_notify_cmos_timer(void); /* The clock frequency of the i8253/i8254 PIT */ #define PIT_TICK_RATE 1193182ul -- cgit v1.2.3 From 33e62e832384c8cb523044e0e9d99d7133f98e93 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 6 Dec 2020 22:46:19 +0100 Subject: ntp, rtc: Move rtc_set_ntp_time() to ntp code rtc_set_ntp_time() is not really RTC functionality as the code is just a user of RTC. Move it into the NTP code which allows further cleanups. Requested-by: Alexandre Belloni Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Acked-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201206220542.166871172@linutronix.de --- include/linux/rtc.h | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 22d1575e4991..ff62680b48ca 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -165,7 +165,6 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc); extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); -extern int rtc_set_ntp_time(struct timespec64 now, unsigned long *target_nsec); int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm); extern int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alrm); @@ -205,39 +204,6 @@ static inline bool is_leap_year(unsigned int year) return (!(year % 4) && (year % 100)) || !(year % 400); } -/* Determine if we can call to driver to set the time. Drivers can only be - * called to set a second aligned time value, and the field set_offset_nsec - * specifies how far away from the second aligned time to call the driver. - * - * This also computes 'to_set' which is the time we are trying to set, and has - * a zero in tv_nsecs, such that: - * to_set - set_delay_nsec == now +/- FUZZ - * - */ -static inline bool rtc_tv_nsec_ok(s64 set_offset_nsec, - struct timespec64 *to_set, - const struct timespec64 *now) -{ - /* Allowed error in tv_nsec, arbitarily set to 5 jiffies in ns. */ - const unsigned long TIME_SET_NSEC_FUZZ = TICK_NSEC * 5; - struct timespec64 delay = {.tv_sec = 0, - .tv_nsec = set_offset_nsec}; - - *to_set = timespec64_add(*now, delay); - - if (to_set->tv_nsec < TIME_SET_NSEC_FUZZ) { - to_set->tv_nsec = 0; - return true; - } - - if (to_set->tv_nsec > NSEC_PER_SEC - TIME_SET_NSEC_FUZZ) { - to_set->tv_sec++; - to_set->tv_nsec = 0; - return true; - } - return false; -} - #define rtc_register_device(device) \ __rtc_register_device(THIS_MODULE, device) -- cgit v1.2.3 From 69eca258c85000564577642ba28335eb4e1df8f0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 6 Dec 2020 22:46:20 +0100 Subject: ntp: Make the RTC sync offset less obscure The current RTC set_offset_nsec value is not really intuitive to understand. tsched twrite(t2.tv_sec - 1) t2 (seconds increment) The offset is calculated from twrite based on the assumption that t2 - twrite == 1s. That means for the MC146818 RTC the offset needs to be negative so that the write happens 500ms before t2. It's easier to understand when the whole calculation is based on t2. That avoids negative offsets and the meaning is obvious: t2 - twrite: The time defined by the chip when seconds increment after the write. twrite - tsched: The time for the transport to the point where the chip is updated. ==> set_offset_nsec = t2 - tsched ttransport = twrite - tsched tRTCinc = t2 - twrite ==> set_offset_nsec = ttransport + tRTCinc tRTCinc is a chip property and can be obtained from the data sheet. ttransport depends on how the RTC is connected. It is close to 0 for directly accessible RTCs. For RTCs behind a slow bus, e.g. i2c, it's the time required to send the update over the bus. This can be estimated or even calibrated, but that's a different problem. Adjust the implementation and update comments accordingly. Signed-off-by: Thomas Gleixner Acked-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201206220542.263204937@linutronix.de --- include/linux/rtc.h | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index ff62680b48ca..b829382de6c3 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -110,13 +110,36 @@ struct rtc_device { /* Some hardware can't support UIE mode */ int uie_unsupported; - /* Number of nsec it takes to set the RTC clock. This influences when - * the set ops are called. An offset: - * - of 0.5 s will call RTC set for wall clock time 10.0 s at 9.5 s - * - of 1.5 s will call RTC set for wall clock time 10.0 s at 8.5 s - * - of -0.5 s will call RTC set for wall clock time 10.0 s at 10.5 s + /* + * This offset specifies the update timing of the RTC. + * + * tsched t1 write(t2.tv_sec - 1sec)) t2 RTC increments seconds + * + * The offset defines how tsched is computed so that the write to + * the RTC (t2.tv_sec - 1sec) is correct versus the time required + * for the transport of the write and the time which the RTC needs + * to increment seconds the first time after the write (t2). + * + * For direct accessible RTCs tsched ~= t1 because the write time + * is negligible. For RTCs behind slow busses the transport time is + * significant and has to be taken into account. + * + * The time between the write (t1) and the first increment after + * the write (t2) is RTC specific. For a MC146818 RTC it's 500ms, + * for many others it's exactly 1 second. Consult the datasheet. + * + * The value of this offset is also used to calculate the to be + * written value (t2.tv_sec - 1sec) at tsched. + * + * The default value for this is NSEC_PER_SEC + 10 msec default + * transport time. The offset can be adjusted by drivers so the + * calculation for the to be written value at tsched becomes + * correct: + * + * newval = tsched + set_offset_nsec - NSEC_PER_SEC + * and (tsched + set_offset_nsec) % NSEC_PER_SEC == 0 */ - long set_offset_nsec; + unsigned long set_offset_nsec; bool registered; -- cgit v1.2.3 From fe79d5de77204dd946cfad76a9bec23354b1a500 Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Thu, 10 Dec 2020 17:05:20 +0100 Subject: USB: typec: tcpm: Add a 30ms room for tPSSourceOn in PR_SWAP TCPM state machine needs 20-25ms to enter the ErrorRecovery state after tPSSourceOn timer timeouts. Change the timer from max 480ms to 450ms to ensure that the timer complies with the Spec. In order to keep the flexibility for other usecases using tPSSourceOn, add another timer only for PR_SWAP. Cc: Guenter Roeck Cc: Heikki Krogerus Cc: Badhri Jagan Sridharan Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Kyle Tso Signed-off-by: Will McVicker Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20201210160521.3417426-5-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index 63a66dd5d832..bb9a782e1411 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -466,6 +466,7 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_T_DRP_SRC 30 #define PD_T_PS_SOURCE_OFF 920 #define PD_T_PS_SOURCE_ON 480 +#define PD_T_PS_SOURCE_ON_PRS 450 /* 390 - 480ms */ #define PD_T_PS_HARD_RESET 30 #define PD_T_SRC_RECOVER 760 #define PD_T_SRC_RECOVER_MAX 1000 -- cgit v1.2.3 From fecc4559780d52d174ea05e3bf543669165389c3 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 2 Dec 2020 14:07:09 +0200 Subject: fsnotify: fix events reported to watching parent and child fsnotify_parent() used to send two separate events to backends when a parent inode is watching children and the child inode is also watching. In an attempt to avoid duplicate events in fanotify, we unified the two backend callbacks to a single callback and handled the reporting of the two separate events for the relevant backends (inotify and dnotify). However the handling is buggy and can result in inotify and dnotify listeners receiving events of the type they never asked for or spurious events. The problem is the unified event callback with two inode marks (parent and child) is called when any of the parent and child inodes are watched and interested in the event, but the parent inode's mark that is interested in the event on the child is not necessarily the one we are currently reporting to (it could belong to a different group). So before reporting the parent or child event flavor to backend we need to check that the mark is really interested in that event flavor. The semantics of INODE and CHILD marks were hard to follow and made the logic more complicated than it should have been. Replace it with INODE and PARENT marks semantics to hopefully make the logic more clear. Thanks to Hugh Dickins for spotting a bug in the earlier version of this patch. Fixes: 497b0c5a7c06 ("fsnotify: send event to parent and child with single callback") CC: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201202120713.702387-4-amir73il@gmail.com Reported-by: Hugh Dickins Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 4ee3044eedd0..a2e42d3cd87c 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -278,7 +278,7 @@ static inline const struct path *fsnotify_data_path(const void *data, enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_INODE, - FSNOTIFY_OBJ_TYPE_CHILD, + FSNOTIFY_OBJ_TYPE_PARENT, FSNOTIFY_OBJ_TYPE_VFSMOUNT, FSNOTIFY_OBJ_TYPE_SB, FSNOTIFY_OBJ_TYPE_COUNT, @@ -286,7 +286,7 @@ enum fsnotify_obj_type { }; #define FSNOTIFY_OBJ_TYPE_INODE_FL (1U << FSNOTIFY_OBJ_TYPE_INODE) -#define FSNOTIFY_OBJ_TYPE_CHILD_FL (1U << FSNOTIFY_OBJ_TYPE_CHILD) +#define FSNOTIFY_OBJ_TYPE_PARENT_FL (1U << FSNOTIFY_OBJ_TYPE_PARENT) #define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT) #define FSNOTIFY_OBJ_TYPE_SB_FL (1U << FSNOTIFY_OBJ_TYPE_SB) #define FSNOTIFY_OBJ_ALL_TYPES_MASK ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1) @@ -331,7 +331,7 @@ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ } FSNOTIFY_ITER_FUNCS(inode, INODE) -FSNOTIFY_ITER_FUNCS(child, CHILD) +FSNOTIFY_ITER_FUNCS(parent, PARENT) FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT) FSNOTIFY_ITER_FUNCS(sb, SB) -- cgit v1.2.3 From 14486c82612a177cb910980c70ba900827ca0894 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 4 Nov 2020 15:46:41 +0200 Subject: rfkill: add a reason to the HW rfkill state The WLAN device may exist yet not be usable. This can happen when the WLAN device is controllable by both the host and some platform internal component. We need some arbritration that is vendor specific, but when the device is not available for the host, we need to reflect this state towards the user space. Add a reason field to the rfkill object (and event) so that userspace can know why the device is in rfkill: because some other platform component currently owns the device, or because the actual hw rfkill signal is asserted. Capable userspace can now determine the reason for the rfkill and possibly do some negotiation on a side band channel using a proprietary protocol to gain ownership on the device in case the device is owned by some other component. When the host gains ownership on the device, the kernel can remove the RFKILL_HARD_BLOCK_NOT_OWNER reason and the hw rfkill state will be off. Then, the userspace can bring the device up and start normal operation. The rfkill_event structure is enlarged to include the additional byte, it is now 9 bytes long. Old user space will ask to read only 8 bytes so that the kernel can know not to feed them with more data. When the user space writes 8 bytes, new kernels will just read what is present in the file descriptor. This new byte is read only from the userspace standpoint anyway. If a new user space uses an old kernel, it'll ask to read 9 bytes but will get only 8, and it'll know that it didn't get the new state. When it'll write 9 bytes, the kernel will again ignore this new byte which is read only from the userspace standpoint. Signed-off-by: Emmanuel Grumbach Link: https://lore.kernel.org/r/20201104134641.28816-1-emmanuel.grumbach@intel.com Signed-off-by: Johannes Berg --- include/linux/rfkill.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 8ad2487a86d5..231e06b74b50 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -137,6 +137,17 @@ void rfkill_unregister(struct rfkill *rfkill); */ void rfkill_destroy(struct rfkill *rfkill); +/** + * rfkill_set_hw_state_reason - Set the internal rfkill hardware block state + * with a reason + * @rfkill: pointer to the rfkill class to modify. + * @blocked: the current hardware block state to set + * @reason: one of &enum rfkill_hard_block_reasons + * + * Prefer to use rfkill_set_hw_state if you don't need any special reason. + */ +bool rfkill_set_hw_state_reason(struct rfkill *rfkill, + bool blocked, unsigned long reason); /** * rfkill_set_hw_state - Set the internal rfkill hardware block state * @rfkill: pointer to the rfkill class to modify. @@ -156,7 +167,11 @@ void rfkill_destroy(struct rfkill *rfkill); * should be blocked) so that drivers need not keep track of the soft * block state -- which they might not be able to. */ -bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked); +static inline bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked) +{ + return rfkill_set_hw_state_reason(rfkill, blocked, + RFKILL_HARD_BLOCK_SIGNAL); +} /** * rfkill_set_sw_state - Set the internal rfkill software block state @@ -256,6 +271,13 @@ static inline void rfkill_destroy(struct rfkill *rfkill) { } +static inline bool rfkill_set_hw_state_reason(struct rfkill *rfkill, + bool blocked, + unsigned long reason) +{ + return blocked; +} + static inline bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked) { return blocked; -- cgit v1.2.3 From d6587602c59974a2eda35e8ed70a4f5970380be8 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Sun, 29 Nov 2020 17:30:46 +0200 Subject: cfg80211: Parse SAE H2E only membership selector This extends the support for drivers that rebuild IEs in the FW (same as with HT/VHT/HE). Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20201129172929.4012647275f3.I1a93ae71c57ef0b6f58f99d47fce919d19d65ff0@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 5e8cc9c3d45a..cbd294239430 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1261,6 +1261,7 @@ struct ieee80211_mgmt { #define BSS_MEMBERSHIP_SELECTOR_HT_PHY 127 #define BSS_MEMBERSHIP_SELECTOR_VHT_PHY 126 #define BSS_MEMBERSHIP_SELECTOR_HE_PHY 122 +#define BSS_MEMBERSHIP_SELECTOR_SAE_H2E 123 /* mgmt header + 1 byte category code */ #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u) -- cgit v1.2.3 From 9850742470804b2cc6a6543bd8f5822eeb5fdbc0 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Sun, 29 Nov 2020 17:30:52 +0200 Subject: ieee80211: update reduced neighbor report TBTT info length A new field (20MHz PSD - 1 byte) was added to the RNR TBTT info field. Adjust the expected TBTT info length accordingly. Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20201129172929.b503adccce6a.Ie684e1d3039c111bf2d521bf762aaec3f7a24d2e@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index cbd294239430..72ff75fb1971 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3836,15 +3836,15 @@ static inline bool for_each_element_completed(const struct element *element, #define WLAN_RSNX_CAPA_SAE_H2E BIT(5) /* - * reduced neighbor report, based on Draft P802.11ax_D5.0, - * section 9.4.2.170 + * reduced neighbor report, based on Draft P802.11ax_D6.1, + * section 9.4.2.170 and accepted contributions. */ #define IEEE80211_AP_INFO_TBTT_HDR_TYPE 0x03 #define IEEE80211_AP_INFO_TBTT_HDR_FILTERED 0x04 #define IEEE80211_AP_INFO_TBTT_HDR_COLOC 0x08 #define IEEE80211_AP_INFO_TBTT_HDR_COUNT 0xF0 -#define IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM 8 -#define IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM 12 +#define IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM 9 +#define IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM 13 #define IEEE80211_RNR_TBTT_PARAMS_OCT_RECOMMENDED 0x01 #define IEEE80211_RNR_TBTT_PARAMS_SAME_SSID 0x02 -- cgit v1.2.3 From 84e0d87c9944eb36ae6037af5cb6905f67c074c5 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 10 Dec 2020 14:30:12 +0000 Subject: thermal: devfreq_cooling: add new registration functions with Energy Model The Energy Model (EM) framework supports devices such as Devfreq. Create new registration function which automatically register EM for the thermal devfreq_cooling devices. This patch prepares the code for coming changes which are going to replace old power model with the new EM. Reviewed-by: Ionela Voinescu Signed-off-by: Lukasz Luba Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20201210143014.24685-4-lukasz.luba@arm.com --- include/linux/devfreq_cooling.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h index 9df2dfca68dd..7a9fbcc7b265 100644 --- a/include/linux/devfreq_cooling.h +++ b/include/linux/devfreq_cooling.h @@ -65,6 +65,9 @@ struct thermal_cooling_device * of_devfreq_cooling_register(struct device_node *np, struct devfreq *df); struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df); void devfreq_cooling_unregister(struct thermal_cooling_device *dfc); +struct thermal_cooling_device * +devfreq_cooling_em_register(struct devfreq *df, + struct devfreq_cooling_power *dfc_power); #else /* !CONFIG_DEVFREQ_THERMAL */ @@ -87,6 +90,13 @@ devfreq_cooling_register(struct devfreq *df) return ERR_PTR(-EINVAL); } +static inline struct thermal_cooling_device * +devfreq_cooling_em_register(struct devfreq *df, + struct devfreq_cooling_power *dfc_power) +{ + return ERR_PTR(-EINVAL); +} + static inline void devfreq_cooling_unregister(struct thermal_cooling_device *dfc) { -- cgit v1.2.3 From 615510fe13bd2434610193f1acab53027d5146d6 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 10 Dec 2020 14:30:13 +0000 Subject: thermal: devfreq_cooling: remove old power model and use EM Remove old power model and use new Energy Model to calculate the power budget. It drops static + dynamic power calculations and power table in order to use Energy Model performance domain data. This model should be easy to use and could find more users. It is also less complicated to setup the needed structures. Reviewed-by: Ionela Voinescu Signed-off-by: Lukasz Luba Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20201210143014.24685-5-lukasz.luba@arm.com --- include/linux/devfreq_cooling.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h index 7a9fbcc7b265..14baa73fc2de 100644 --- a/include/linux/devfreq_cooling.h +++ b/include/linux/devfreq_cooling.h @@ -16,17 +16,6 @@ /** * struct devfreq_cooling_power - Devfreq cooling power ops - * @get_static_power: Take voltage, in mV, and return the static power - * in mW. If NULL, the static power is assumed - * to be 0. - * @get_dynamic_power: Take voltage, in mV, and frequency, in HZ, and - * return the dynamic power draw in mW. If NULL, - * a simple power model is used. - * @dyn_power_coeff: Coefficient for the simple dynamic power model in - * mW/(MHz mV mV). - * If get_dynamic_power() is NULL, then the - * dynamic power is calculated as - * @dyn_power_coeff * frequency * voltage^2 * @get_real_power: When this is set, the framework uses it to ask the * device driver for the actual power. * Some devices have more sophisticated methods @@ -46,14 +35,8 @@ * max total (static + dynamic) power value for each OPP. */ struct devfreq_cooling_power { - unsigned long (*get_static_power)(struct devfreq *devfreq, - unsigned long voltage); - unsigned long (*get_dynamic_power)(struct devfreq *devfreq, - unsigned long freq, - unsigned long voltage); int (*get_real_power)(struct devfreq *df, u32 *power, unsigned long freq, unsigned long voltage); - unsigned long dyn_power_coeff; }; #ifdef CONFIG_DEVFREQ_THERMAL -- cgit v1.2.3 From d7203eedf4f68e9909fd489453168a9d26bf0c3d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 10 Dec 2020 13:15:11 +0100 Subject: thermal/core: Add critical and hot ops Currently there is no way to the sensors to directly call an ops in interrupt mode without calling thermal_zone_device_update assuming all the trip points are defined. A sensor may want to do something special if a trip point is hot or critical. This patch adds the critical and hot ops to the thermal zone device, so a sensor can directly invoke them or let the thermal framework to call the sensor specific ones. Tested-by: Kai-Heng Feng Signed-off-by: Daniel Lezcano Reviewed-by: Lukasz Luba Link: https://lore.kernel.org/r/20201210121514.25760-2-daniel.lezcano@linaro.org --- include/linux/thermal.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index d07ea27e72a9..31b84404f047 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -79,6 +79,8 @@ struct thermal_zone_device_ops { enum thermal_trend *); int (*notify) (struct thermal_zone_device *, int, enum thermal_trip_type); + void (*hot)(struct thermal_zone_device *); + void (*critical)(struct thermal_zone_device *); }; struct thermal_cooling_device_ops { @@ -399,6 +401,7 @@ void thermal_cdev_update(struct thermal_cooling_device *); void thermal_notify_framework(struct thermal_zone_device *, int); int thermal_zone_device_enable(struct thermal_zone_device *tz); int thermal_zone_device_disable(struct thermal_zone_device *tz); +void thermal_zone_device_critical(struct thermal_zone_device *tz); #else static inline struct thermal_zone_device *thermal_zone_device_register( const char *type, int trips, int mask, void *devdata, -- cgit v1.2.3 From 6a6939d5f588b40db32b82ebcec20ee5189c8376 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 10 Dec 2020 18:27:48 -0300 Subject: regulator: pfuze100: Convert the driver to DT-only Since 5.10-rc1 i.MX is a devicetree-only platform, so simplify the code by removing the unused non-DT support. Signed-off-by: Fabio Estevam Link: https://lore.kernel.org/r/20201210212748.5849-1-festevam@gmail.com Signed-off-by: Mark Brown --- include/linux/regulator/pfuze100.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/pfuze100.h b/include/linux/regulator/pfuze100.h index d47e668d9ca8..c964fe8ab698 100644 --- a/include/linux/regulator/pfuze100.h +++ b/include/linux/regulator/pfuze100.h @@ -63,10 +63,4 @@ #define PFUZE3001_VLDO3 8 #define PFUZE3001_VLDO4 9 -struct regulator_init_data; - -struct pfuze_regulator_platform_data { - struct regulator_init_data *init_data[PFUZE100_MAX_REGULATOR]; -}; - #endif /* __LINUX_REG_PFUZE100_H */ -- cgit v1.2.3 From b388fa50142510fb6477f130bb1b3f05a0a263a1 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 9 Nov 2020 09:41:21 +0000 Subject: Revert "genirq: Add fasteoi IPI flow" handle_percpu_devid_fasteoi_ipi() has no more users, and handle_percpu_devid_irq() can do all that it was supposed to do. Get rid of it. This reverts commit c5e5ec033c4ab25c53f1fd217849e75deb0bf7bf. Signed-off-by: Valentin Schneider Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201109094121.29975-6-valentin.schneider@arm.com --- include/linux/irq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index c54365309e97..ca26bec51cec 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -647,7 +647,6 @@ static inline int irq_set_parent(int irq, int parent_irq) */ extern void handle_level_irq(struct irq_desc *desc); extern void handle_fasteoi_irq(struct irq_desc *desc); -extern void handle_percpu_devid_fasteoi_ipi(struct irq_desc *desc); extern void handle_edge_irq(struct irq_desc *desc); extern void handle_edge_eoi_irq(struct irq_desc *desc); extern void handle_simple_irq(struct irq_desc *desc); -- cgit v1.2.3 From 1d3aec89286254487df7641c30f1b14ad1d127a5 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 2 Dec 2020 18:36:53 +0800 Subject: genirq/affinity: Add irq_update_affinity_desc() Add a function to allow the affinity of an interrupt be switched to managed, such that interrupts allocated for platform devices may be managed. This new interface has certain limitations, and attempts to use it in the following circumstances will fail: - For when the kernel is configured for generic IRQ reservation mode (in config GENERIC_IRQ_RESERVATION_MODE). The reason being that it could conflict with managed vs. non-managed interrupt accounting. - The interrupt is already started, which should not be the case during init - The interrupt is already configured as managed, which means double init Suggested-by: Thomas Gleixner Signed-off-by: John Garry Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1606905417-183214-2-git-send-email-john.garry@huawei.com --- include/linux/interrupt.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index ee8299eb1f52..870b3251e174 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -352,6 +352,8 @@ extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); +extern int irq_update_affinity_desc(unsigned int irq, + struct irq_affinity_desc *affinity); extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); @@ -387,6 +389,12 @@ static inline int irq_set_affinity_hint(unsigned int irq, return -EINVAL; } +static inline int irq_update_affinity_desc(unsigned int irq, + struct irq_affinity_desc *affinity) +{ + return -EINVAL; +} + static inline int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) { -- cgit v1.2.3 From 9806731db684a475ade1e95d166089b9edbd9da3 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 2 Dec 2020 18:36:54 +0800 Subject: resource: Add irqresource_disabled() Add a common function to set the fields for a irq resource to disabled, which mimics what is done in acpi_dev_irqresource_disabled(), with a view to replace that function. Signed-off-by: John Garry Signed-off-by: Marc Zyngier Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/1606905417-183214-3-git-send-email-john.garry@huawei.com --- include/linux/ioport.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 5135d4b86cd6..f9bf374f9633 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -307,6 +307,13 @@ struct resource *devm_request_free_mem_region(struct device *dev, struct resource *request_free_mem_region(struct resource *base, unsigned long size, const char *name); +static inline void irqresource_disabled(struct resource *res, u32 irq) +{ + res->start = irq; + res->end = irq; + res->flags = IORESOURCE_IRQ | IORESOURCE_DISABLED | IORESOURCE_UNSET; +} + #ifdef CONFIG_IO_STRICT_DEVMEM void revoke_devmem(struct resource *res); #else -- cgit v1.2.3 From e15f2fa959f2cce8a05e8e3a596e75d068cd42c5 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 2 Dec 2020 18:36:56 +0800 Subject: driver core: platform: Add devm_platform_get_irqs_affinity() Drivers for multi-queue platform devices may also want managed interrupts for handling HW queue completion interrupts, so add support. The function accepts an affinity descriptor pointer, which covers all IRQs expected for the device. The function is devm class as the only current in-tree user will also use devm method for requesting the interrupts; as such, the function is made as devm as it can ensure ordering of freeing the irq and disposing of the mapping. Signed-off-by: John Garry Signed-off-by: Marc Zyngier Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/1606905417-183214-5-git-send-email-john.garry@huawei.com --- include/linux/platform_device.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 77a2aada106d..4d75633e6735 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -15,6 +15,7 @@ #define PLATFORM_DEVID_NONE (-1) #define PLATFORM_DEVID_AUTO (-2) +struct irq_affinity; struct mfd_cell; struct property_entry; struct platform_device_id; @@ -70,6 +71,11 @@ devm_platform_ioremap_resource_byname(struct platform_device *pdev, extern int platform_get_irq(struct platform_device *, unsigned int); extern int platform_get_irq_optional(struct platform_device *, unsigned int); extern int platform_irq_count(struct platform_device *); +extern int devm_platform_get_irqs_affinity(struct platform_device *dev, + struct irq_affinity *affd, + unsigned int minvec, + unsigned int maxvec, + int **irqs); extern struct resource *platform_get_resource_byname(struct platform_device *, unsigned int, const char *); -- cgit v1.2.3 From 426506a7e0f1902268c3edbdc7e5475624a9d18b Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 8 Dec 2020 11:04:24 +0200 Subject: dmaengine: ti: k3-udma-glue: Add function to get device pointer for DMA API Glue layer users should use the device of the DMA for DMA mapping and allocations as it is the DMA which accesses to descriptors and buffers, not the clients Signed-off-by: Peter Ujfalusi Reviewed-by: Grygorii Strashko Link: https://lore.kernel.org/r/20201208090440.31792-5-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dma/k3-udma-glue.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h index 5eb34ad973a7..d7c12f31377c 100644 --- a/include/linux/dma/k3-udma-glue.h +++ b/include/linux/dma/k3-udma-glue.h @@ -41,6 +41,8 @@ void k3_udma_glue_reset_tx_chn(struct k3_udma_glue_tx_channel *tx_chn, u32 k3_udma_glue_tx_get_hdesc_size(struct k3_udma_glue_tx_channel *tx_chn); u32 k3_udma_glue_tx_get_txcq_id(struct k3_udma_glue_tx_channel *tx_chn); int k3_udma_glue_tx_get_irq(struct k3_udma_glue_tx_channel *tx_chn); +struct device * + k3_udma_glue_tx_get_dma_device(struct k3_udma_glue_tx_channel *tx_chn); enum { K3_UDMA_GLUE_SRC_TAG_LO_KEEP = 0, @@ -130,5 +132,7 @@ int k3_udma_glue_rx_flow_enable(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_idx); int k3_udma_glue_rx_flow_disable(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_idx); +struct device * + k3_udma_glue_rx_get_dma_device(struct k3_udma_glue_rx_channel *rx_chn); #endif /* K3_UDMA_GLUE_H_ */ -- cgit v1.2.3 From 4f910c035f38053ac8eb63a672c78862c535cd0f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 8 Dec 2020 11:04:27 +0200 Subject: dmaengine: of-dma: Add support for optional router configuration callback Additional configuration for the DMA event router might be needed for a channel which can not be done during device_alloc_chan_resources callback since the router information is not yet present for the drivers. If there is a need for additional configuration for the channel if DMA router is in use, then the driver can implement the device_router_config callback. Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201208090440.31792-8-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 493a047ed0a2..aed44888cad3 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -805,6 +805,7 @@ struct dma_filter { * by tx_status * @device_alloc_chan_resources: allocate resources and return the * number of allocated descriptors + * @device_router_config: optional callback for DMA router configuration * @device_free_chan_resources: release DMA channel's resources * @device_prep_dma_memcpy: prepares a memcpy operation * @device_prep_dma_xor: prepares a xor operation @@ -879,6 +880,7 @@ struct dma_device { enum dma_residue_granularity residue_granularity; int (*device_alloc_chan_resources)(struct dma_chan *chan); + int (*device_router_config)(struct dma_chan *chan); void (*device_free_chan_resources)(struct dma_chan *chan); struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)( -- cgit v1.2.3 From ab650ef6d548153862119e1bf3bf267510707f48 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 8 Dec 2020 11:04:28 +0200 Subject: dmaengine: Add support for per channel coherency handling If the DMA device supports per channel coherency configuration (a channel can be configured to have coherent or not coherent view) then a single device (the DMA controller's device) can not be used for dma_api for all channels as channels can have different coherency. Introduce custom_dma_mapping flag for the dma_chan and a new helper to get the device pointer to be used for dma_api for the given channel. Client drivers should be updated to be able to support per channel coherency by: - dma_map_single(chan->device->dev, ptr, size, DMA_TO_DEVICE); + struct device *dma_dev = dmaengine_get_dma_device(chan); + + dma_map_single(dma_dev, ptr, size, DMA_TO_DEVICE); Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201208090440.31792-9-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index aed44888cad3..68130f5f599e 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -357,11 +357,14 @@ struct dma_chan { * @chan: driver channel device * @device: sysfs device * @dev_id: parent dma_device dev_id + * @chan_dma_dev: The channel is using custom/different dma-mapping + * compared to the parent dma_device */ struct dma_chan_dev { struct dma_chan *chan; struct device device; int dev_id; + bool chan_dma_dev; }; /** @@ -1618,4 +1621,13 @@ dmaengine_get_direction_text(enum dma_transfer_direction dir) return "invalid"; } } + +static inline struct device *dmaengine_get_dma_device(struct dma_chan *chan) +{ + if (chan->dev->chan_dma_dev) + return &chan->dev->device; + + return chan->device->dev; +} + #endif /* DMAENGINE_H */ -- cgit v1.2.3 From b9366e2577a38ca5322f326cff9752c2008597c6 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 8 Dec 2020 11:04:33 +0200 Subject: dmaengine: ti: k3-psil: Extend psil_endpoint_config for K3 PKTDMA Additional fields needed for K3 PKTDMA to be able to handle the mapped channels (channels are locked to handle specific threads) and flow ranges for these mapped threads. PKTDMA also introduces tflow for tx channels which can not be found in K3 UDMA architecture. Signed-off-by: Peter Ujfalusi Reviewed-by: Grygorii Strashko Link: https://lore.kernel.org/r/20201208090440.31792-14-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dma/k3-psil.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/k3-psil.h b/include/linux/dma/k3-psil.h index 1962f75fa2d3..36e22c5a0f29 100644 --- a/include/linux/dma/k3-psil.h +++ b/include/linux/dma/k3-psil.h @@ -50,6 +50,15 @@ enum psil_endpoint_type { * @channel_tpl: Desired throughput level for the channel * @pdma_acc32: ACC32 must be enabled on the PDMA side * @pdma_burst: BURST must be enabled on the PDMA side + * @mapped_channel_id: PKTDMA thread to channel mapping for mapped channels. + * The thread must be serviced by the specified channel if + * mapped_channel_id is >= 0 in case of PKTDMA + * @flow_start: PKDMA flow range start of mapped channel. Unmapped + * channels use flow_id == chan_id + * @flow_num: PKDMA flow count of mapped channel. Unmapped channels + * use flow_id == chan_id + * @default_flow_id: PKDMA default (r)flow index of mapped channel. + * Must be within the flow range of the mapped channel. */ struct psil_endpoint_config { enum psil_endpoint_type ep_type; @@ -63,6 +72,13 @@ struct psil_endpoint_config { /* PDMA properties, valid for PSIL_EP_PDMA_* */ unsigned pdma_acc32:1; unsigned pdma_burst:1; + + /* PKDMA mapped channel */ + int mapped_channel_id; + /* PKTDMA tflow and rflow ranges for mapped channel */ + u16 flow_start; + u16 flow_num; + u16 default_flow_id; }; int psil_set_new_ep_config(struct device *dev, const char *name, -- cgit v1.2.3 From fc373e47d72605cc3f5012ddda49d2dca430d51f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 8 Dec 2020 11:04:35 +0200 Subject: dmaengine: ti: Add support for k3 event routers In k3 architecture a DMA channel (in TR momde) can be triggered by global events, origination from different modules. The events for triggers can be sent from any module which is connected to PSI-L fabric, but the event number to be sent is DMA channel specific, it is only known after the channel itself is requested. The router operation needs to be split up: - route_allocate: configure the dma_spec for the DMA and store the configuration which is needed for the router's input - set_event: callback used by the DMA driver to set the event number for the channel and enable the routing Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201208090440.31792-16-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dma/k3-event-router.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/dma/k3-event-router.h (limited to 'include/linux') diff --git a/include/linux/dma/k3-event-router.h b/include/linux/dma/k3-event-router.h new file mode 100644 index 000000000000..e3f88b2f87be --- /dev/null +++ b/include/linux/dma/k3-event-router.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Texas Instruments Incorporated - https://www.ti.com + */ + +#ifndef K3_EVENT_ROUTER_ +#define K3_EVENT_ROUTER_ + +#include + +struct k3_event_route_data { + void *priv; + int (*set_event)(void *priv, u32 event); +}; + +#endif /* K3_EVENT_ROUTER_ */ -- cgit v1.2.3 From d782298c6f6b854452965b56d91616dfb60490c5 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Tue, 8 Dec 2020 11:04:36 +0200 Subject: soc: ti: k3-ringacc: add AM64 DMA rings support. The DMAs in AM64 have built in rings compared to AM654/J721e/J7200 where a separate and generic ringacc is used. The ring SW interface is similar to ringacc with some major architectural differences, like They are part of the DMA (BCDMA or PKTDMA). They are dual mode rings are modeled as pair of Rings objects which has common configuration and memory buffer, but separate real-time control register sets for each direction mem2dev (forward) and dev2mem (reverse). The ringacc driver must be initialized for DMA rings use with k3_ringacc_dmarings_init() as it is not an independent device as ringacc is. AM64 rings must be requested only using k3_ringacc_request_rings_pair(), and forward ring must always be initialized/configured. After this any other Ringacc APIs can be used without any callers changes. Signed-off-by: Grygorii Strashko Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201208090440.31792-17-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/soc/ti/k3-ringacc.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/ti/k3-ringacc.h b/include/linux/soc/ti/k3-ringacc.h index 658dc71d2901..39b022b92598 100644 --- a/include/linux/soc/ti/k3-ringacc.h +++ b/include/linux/soc/ti/k3-ringacc.h @@ -70,6 +70,7 @@ struct k3_ring; * @dma_dev: Master device which is using and accessing to the ring * memory when the mode is K3_RINGACC_RING_MODE_RING. Memory allocations * should be done using this device. + * @asel: Address Space Select value for physical addresses */ struct k3_ring_cfg { u32 size; @@ -79,6 +80,7 @@ struct k3_ring_cfg { u32 flags; struct device *dma_dev; + u32 asel; }; #define K3_RINGACC_RING_ID_ANY (-1) @@ -250,4 +252,19 @@ int k3_ringacc_ring_pop_tail(struct k3_ring *ring, void *elem); u32 k3_ringacc_get_tisci_dev_id(struct k3_ring *ring); +/* DMA ring support */ +struct ti_sci_handle; + +/** + * struct struct k3_ringacc_init_data - Initialization data for DMA rings + */ +struct k3_ringacc_init_data { + const struct ti_sci_handle *tisci; + u32 tisci_dev_id; + u32 num_rings; +}; + +struct k3_ringacc *k3_ringacc_dmarings_init(struct platform_device *pdev, + struct k3_ringacc_init_data *data); + #endif /* __SOC_TI_K3_RINGACC_API_H_ */ -- cgit v1.2.3 From 5b65781d06ea90ef2f8e51a13352c43c3daa8cdc Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Tue, 8 Dec 2020 11:04:40 +0200 Subject: dmaengine: ti: k3-udma-glue: Add support for K3 PKTDMA This commit adds support for PKTDMA in k3-udma glue driver. Use new psil_endpoint_config struct to get static data for a given channel or a flow during setup. Make sure that the RX flows being mapped to a RX channel is within the range of flows that is been allocated to that RX channel. Signed-off-by: Vignesh Raghavendra Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201208090440.31792-21-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dma/k3-udma-glue.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h index d7c12f31377c..e443be4d3b4b 100644 --- a/include/linux/dma/k3-udma-glue.h +++ b/include/linux/dma/k3-udma-glue.h @@ -43,6 +43,10 @@ u32 k3_udma_glue_tx_get_txcq_id(struct k3_udma_glue_tx_channel *tx_chn); int k3_udma_glue_tx_get_irq(struct k3_udma_glue_tx_channel *tx_chn); struct device * k3_udma_glue_tx_get_dma_device(struct k3_udma_glue_tx_channel *tx_chn); +void k3_udma_glue_tx_dma_to_cppi5_addr(struct k3_udma_glue_tx_channel *tx_chn, + dma_addr_t *addr); +void k3_udma_glue_tx_cppi5_to_dma_addr(struct k3_udma_glue_tx_channel *tx_chn, + dma_addr_t *addr); enum { K3_UDMA_GLUE_SRC_TAG_LO_KEEP = 0, @@ -134,5 +138,9 @@ int k3_udma_glue_rx_flow_disable(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_idx); struct device * k3_udma_glue_rx_get_dma_device(struct k3_udma_glue_rx_channel *rx_chn); +void k3_udma_glue_rx_dma_to_cppi5_addr(struct k3_udma_glue_rx_channel *rx_chn, + dma_addr_t *addr); +void k3_udma_glue_rx_cppi5_to_dma_addr(struct k3_udma_glue_rx_channel *rx_chn, + dma_addr_t *addr); #endif /* K3_UDMA_GLUE_H_ */ -- cgit v1.2.3 From e998879d4fb7991856916972168cf27c0d86ed12 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Thu, 10 Dec 2020 01:25:15 +0000 Subject: x86,swiotlb: Adjust SWIOTLB bounce buffer size for SEV guests For SEV, all DMA to and from guest has to use shared (un-encrypted) pages. SEV uses SWIOTLB to make this happen without requiring changes to device drivers. However, depending on the workload being run, the default 64MB of it might not be enough and it may run out of buffers to use for DMA, resulting in I/O errors and/or performance degradation for high I/O workloads. Adjust the default size of SWIOTLB for SEV guests using a percentage of the total memory available to guest for the SWIOTLB buffers. Adds a new sev_setup_arch() function which is invoked from setup_arch() and it calls into a new swiotlb generic code function swiotlb_adjust_size() to do the SWIOTLB buffer adjustment. v5 fixed build errors and warnings as Reported-by: kbuild test robot Signed-off-by: Ashish Kalra Co-developed-by: Borislav Petkov Signed-off-by: Borislav Petkov Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index fbdc65782195..d9c9fc9ca5d2 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -30,6 +30,9 @@ enum swiotlb_force { */ #define IO_TLB_SHIFT 11 +/* default to 64MB */ +#define IO_TLB_DEFAULT_SIZE (64UL<<20) + extern void swiotlb_init(int verbose); int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); extern unsigned long swiotlb_nr_tbl(void); @@ -78,6 +81,7 @@ void __init swiotlb_exit(void); unsigned int swiotlb_max_segment(void); size_t swiotlb_max_mapping_size(struct device *dev); bool is_swiotlb_active(void); +void __init swiotlb_adjust_size(unsigned long new_size); #else #define swiotlb_force SWIOTLB_NO_FORCE static inline bool is_swiotlb_buffer(phys_addr_t paddr) @@ -100,6 +104,10 @@ static inline bool is_swiotlb_active(void) { return false; } + +static inline void swiotlb_adjust_size(unsigned long new_size) +{ +} #endif /* CONFIG_SWIOTLB */ extern void swiotlb_print_info(void); -- cgit v1.2.3 From 14dc3983b5dff513a90bd5a8cc90acaf7867c3d0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 11 Dec 2020 13:36:38 -0800 Subject: kbuild: avoid static_assert for genksyms genksyms does not know or care about the _Static_assert() built-in, and sometimes falls back to ignoring the later symbols, which causes undefined behavior such as WARNING: modpost: EXPORT symbol "ethtool_set_ethtool_phy_ops" [vmlinux] version generation failed, symbol will not be versioned. ld: net/ethtool/common.o: relocation R_AARCH64_ABS32 against `__crc_ethtool_set_ethtool_phy_ops' can not be used when making a shared object net/ethtool/common.o:(_ftrace_annotated_branch+0x0): dangerous relocation: unsupported relocation Redefine static_assert for genksyms to avoid that. Link: https://lkml.kernel.org/r/20201203230955.1482058-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Suggested-by: Ard Biesheuvel Cc: Masahiro Yamada Cc: Michal Marek Cc: Kees Cook Cc: Rikard Falkeborn Cc: Marco Elver Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/build_bug.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index e3a0be2c90ad..7bb66e15b481 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -77,4 +77,9 @@ #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) +#ifdef __GENKSYMS__ +/* genksyms gets confused by _Static_assert */ +#define _Static_assert(expr, ...) +#endif + #endif /* _LINUX_BUILD_BUG_H */ -- cgit v1.2.3 From 6e7b64b9dd6d96537d816ea07ec26b7dedd397b9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 11 Dec 2020 13:36:46 -0800 Subject: elfcore: fix building with clang kernel/elfcore.c only contains weak symbols, which triggers a bug with clang in combination with recordmcount: Cannot find symbol for section 2: .text. kernel/elfcore.o: failed Move the empty stubs into linux/elfcore.h as inline functions. As only two architectures use these, just use the architecture specific Kconfig symbols to key off the declaration. Link: https://lkml.kernel.org/r/20201204165742.3815221-2-arnd@kernel.org Signed-off-by: Arnd Bergmann Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Barret Rhoden Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/elfcore.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 46c3d691f677..de51c1bef27d 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -104,6 +104,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg #endif } +#if defined(CONFIG_UM) || defined(CONFIG_IA64) /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its @@ -118,5 +119,26 @@ elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset); extern int elf_core_write_extra_data(struct coredump_params *cprm); extern size_t elf_core_extra_data_size(void); +#else +static inline Elf_Half elf_core_extra_phdrs(void) +{ + return 0; +} + +static inline int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) +{ + return 1; +} + +static inline int elf_core_write_extra_data(struct coredump_params *cprm) +{ + return 1; +} + +static inline size_t elf_core_extra_data_size(void) +{ + return 0; +} +#endif #endif /* _LINUX_ELFCORE_H */ -- cgit v1.2.3 From 2ab695aa8eb8f3226f68a2b91fc6103b56fcb57d Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 11 Dec 2020 12:26:29 -0800 Subject: ACPI: Use fwnode_init() to set up fwnode Commit 01bb86b380a3 ("driver core: Add fwnode_init()") was supposed to fix up all instances of fwnode creation to use fwnode_init(). But looks like this instance was missed. This causes a NULL pointer dereference during device_add() [1]. So, fix it. [ 60.792324][ T1] Call trace: [ 60.795495][ T1] device_add+0xf60/0x16b0 __fw_devlink_link_to_consumers at drivers/base/core.c:1583 (inlined by) fw_devlink_link_device at drivers/base/core.c:1726 (inlined by) device_add at drivers/base/core.c:3088 [ 60.799813][ T1] platform_device_add+0x274/0x628 [ 60.804833][ T1] acpi_iort_init+0x9d8/0xc50 [ 60.809415][ T1] acpi_init+0x45c/0x4e8 [ 60.813556][ T1] do_one_initcall+0x170/0xb70 [ 60.818224][ T1] kernel_init_freeable+0x6a8/0x734 [ 60.823332][ T1] kernel_init+0x18/0x12c [ 60.827566][ T1] ret_from_fork+0x10/0x1c [ 60.838756][ T1] ---[ end trace fa5c8ce17a226d83 ]--- [1] - https://lore.kernel.org/linux-arm-kernel/02e7047071f0b54b046ac472adeeb3fafabc643c.camel@redhat.com/ Fixes: 01bb86b380a3 ("driver core: Add fwnode_init()") Reported-by: Qian Cai Suggested-by: Robin Murphy Tested-by: Marc Zyngier Acked-by: Rafael J. Wysocki Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20201211202629.2164655-1-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 39263c6b52e1..2630c2e953f7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -55,7 +55,7 @@ static inline struct fwnode_handle *acpi_alloc_fwnode_static(void) if (!fwnode) return NULL; - fwnode->ops = &acpi_static_fwnode_ops; + fwnode_init(fwnode, &acpi_static_fwnode_ops); return fwnode; } -- cgit v1.2.3 From 98b89b649fce39dacb9dc036d6d0fdb8caff73f7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 16:03:01 -0600 Subject: signal: kill JOBCTL_TASK_WORK It's no longer used, get rid of it. Signed-off-by: Jens Axboe --- include/linux/sched/jobctl.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h index d2b4204ba4d3..fa067de9f1a9 100644 --- a/include/linux/sched/jobctl.h +++ b/include/linux/sched/jobctl.h @@ -19,7 +19,6 @@ struct task_struct; #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ -#define JOBCTL_TASK_WORK_BIT 24 /* set by TWA_SIGNAL */ #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) @@ -29,10 +28,9 @@ struct task_struct; #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) -#define JOBCTL_TASK_WORK (1UL << JOBCTL_TASK_WORK_BIT) #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) -#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK | JOBCTL_TASK_WORK) +#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask); extern void task_clear_jobctl_trapping(struct task_struct *task); -- cgit v1.2.3 From e296dc4996b8094ccde45d19090d804c4103513e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Oct 2020 16:04:39 -0600 Subject: kernel: remove checking for TIF_NOTIFY_SIGNAL It's available everywhere now, no need to check or add dummy defines. Signed-off-by: Jens Axboe --- include/linux/entry-common.h | 4 ---- include/linux/sched/signal.h | 2 -- include/linux/tracehook.h | 4 ---- 3 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index b9711e813ec2..abec3a5ae799 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -37,10 +37,6 @@ # define _TIF_UPROBE (0) #endif -#ifndef _TIF_NOTIFY_SIGNAL -# define _TIF_NOTIFY_SIGNAL (0) -#endif - /* * TIF flags handled in syscall_enter_from_user_mode() */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index bd5afa076189..24b7b862e043 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -360,7 +360,6 @@ static inline int task_sigpending(struct task_struct *p) static inline int signal_pending(struct task_struct *p) { -#if defined(TIF_NOTIFY_SIGNAL) /* * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same * behavior in terms of ensuring that we break out of wait loops @@ -368,7 +367,6 @@ static inline int signal_pending(struct task_struct *p) */ if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL))) return 1; -#endif return task_sigpending(p); } diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index f7d82e4fafd6..ee9ab7dbc8c3 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -205,12 +205,10 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) */ static inline void tracehook_notify_signal(void) { -#if defined(TIF_NOTIFY_SIGNAL) clear_thread_flag(TIF_NOTIFY_SIGNAL); smp_mb__after_atomic(); if (current->task_works) task_work_run(); -#endif } /* @@ -218,11 +216,9 @@ static inline void tracehook_notify_signal(void) */ static inline void set_notify_signal(struct task_struct *task) { -#if defined(TIF_NOTIFY_SIGNAL) if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) && !wake_up_state(task, TASK_INTERRUPTIBLE)) kick_process(task); -#endif } #endif /* */ -- cgit v1.2.3 From 3cabca87b329cbcbdf295be0094adbd72c7b1f67 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 12 Dec 2020 18:29:20 +0100 Subject: ntp: Fix prototype in the !CONFIG_GENERIC_CMOS_UPDATE case In the !CONFIG_GENERIC_CMOS_UPDATE case the update_persistent_clock64() function gets defined as a stub in ntp.c - make the prototype in conditional on CONFIG_GENERIC_CMOS_UPDATE as well. Fixes: 76e87d96b30b5 ("ntp: Consolidate the RTC update implementation") Signed-off-by: Ingo Molnar Acked-by: Thomas Gleixner --- include/linux/timekeeping.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7f7e4a3f4394..929d3f3937c0 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -303,6 +303,8 @@ extern int persistent_clock_is_local; extern void read_persistent_clock64(struct timespec64 *ts); void read_persistent_wall_and_boot_offset(struct timespec64 *wall_clock, struct timespec64 *boot_offset); +#ifdef CONFIG_GENERIC_CMOS_UPDATE extern int update_persistent_clock64(struct timespec64 now); +#endif #endif -- cgit v1.2.3 From 9a20f6f4e6ba9713605fbf7e7426ca22f1181545 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 Dec 2020 15:16:46 -0500 Subject: SUNRPC: Fixes for xdr_align_data() The main use case right now for xdr_align_data() is to shift the page data to the left, and in practice shrink the total XDR data buffer. This patch ensures that we fix up the accounting for the buffer length as we shift that data around. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 9548d075e06d..2b4e44bb0654 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -252,7 +252,7 @@ extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); -extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); +extern unsigned int xdr_align_data(struct xdr_stream *, unsigned int offset, unsigned int length); extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); /** -- cgit v1.2.3 From c4f2f591f02c392ea7de018d2733748bf4c7b5f5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 Dec 2020 17:15:09 -0500 Subject: SUNRPC: Fix xdr_expand_hole() We do want to try to grow the buffer if possible, but if that attempt fails, we still want to move the data and truncate the XDR message. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 2b4e44bb0654..178f499e2283 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -253,7 +253,7 @@ extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); extern unsigned int xdr_align_data(struct xdr_stream *, unsigned int offset, unsigned int length); -extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); +extern unsigned int xdr_expand_hole(struct xdr_stream *, unsigned int offset, unsigned int length); /** * xdr_stream_remaining - Return the number of bytes remaining in the stream -- cgit v1.2.3 From f8d0e60f1056687826abc1eded98f0ea067dfc4c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 8 Dec 2020 22:56:18 -0500 Subject: SUNRPC: Cleanup - constify a number of xdr_buf helpers There are a number of xdr helpers for struct xdr_buf that do not change the structure itself. Mark those as taking const pointers for documentation purposes. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 178f499e2283..68d49fdc4ee9 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -128,8 +128,8 @@ __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); -void xdr_terminate_string(struct xdr_buf *, const u32); -size_t xdr_buf_pagecount(struct xdr_buf *buf); +void xdr_terminate_string(const struct xdr_buf *, const u32); +size_t xdr_buf_pagecount(const struct xdr_buf *buf); int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp); void xdr_free_bvec(struct xdr_buf *buf); @@ -182,14 +182,14 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) * XDR buffer helper functions */ extern void xdr_shift_buf(struct xdr_buf *, size_t); -extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); -extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern void xdr_buf_from_iov(const struct kvec *, struct xdr_buf *); +extern int xdr_buf_subsegment(const struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); extern void xdr_buf_trim(struct xdr_buf *, unsigned int); -extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); -extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); +extern int read_bytes_from_xdr_buf(const struct xdr_buf *, unsigned int, void *, unsigned int); +extern int write_bytes_to_xdr_buf(const struct xdr_buf *, unsigned int, void *, unsigned int); -extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32); -extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *); +extern int xdr_encode_word(const struct xdr_buf *, unsigned int, u32); +extern int xdr_decode_word(const struct xdr_buf *, unsigned int, u32 *); struct xdr_array2_desc; typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem); @@ -200,9 +200,9 @@ struct xdr_array2_desc { xdr_xcode_elem_t xcode; }; -extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base, +extern int xdr_decode_array2(const struct xdr_buf *buf, unsigned int base, struct xdr_array2_desc *desc); -extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base, +extern int xdr_encode_array2(const struct xdr_buf *buf, unsigned int base, struct xdr_array2_desc *desc); extern void _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len); @@ -251,7 +251,7 @@ extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buf extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); -extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +extern int xdr_process_buf(const struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); extern unsigned int xdr_align_data(struct xdr_stream *, unsigned int offset, unsigned int length); extern unsigned int xdr_expand_hole(struct xdr_stream *, unsigned int offset, unsigned int length); -- cgit v1.2.3 From 7c03e2cda4a584cadc398e8f6641ca9988a39d52 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 14 Dec 2020 15:26:13 +0100 Subject: vfs: move cap_convert_nscap() call into vfs_setxattr() cap_convert_nscap() does permission checking as well as conversion of the xattr value conditionally based on fs's user-ns. This is needed by overlayfs and probably other layered fs (ecryptfs) and is what vfs_foo() is supposed to do anyway. Signed-off-by: Miklos Szeredi Acked-by: James Morris --- include/linux/capability.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 1e7fe311cabe..b2f698915c0f 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -270,6 +270,6 @@ static inline bool checkpoint_restore_ns_capable(struct user_namespace *ns) /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); -extern int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size); +extern int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size); #endif /* !_LINUX_CAPABILITY_H */ -- cgit v1.2.3 From e0a6aa30504cb8179d07609fb6386705e8f00663 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 13 Dec 2020 09:39:40 +0100 Subject: efi: ia64: disable the capsule loader EFI capsule loading is a feature that was introduced into EFI long after its initial introduction on Itanium, and it is highly unlikely that IA64 systems are receiving firmware updates in the first place, let alone using EFI capsules. So let's disable capsule support altogether on IA64. This fixes a build error on IA64 due to a recent change that added an unconditional include of asm/efi.h, which IA64 does not provide. While at it, tweak the make rules a bit so that the EFI capsule component that is always builtin (even if the EFI capsule loader itself is built as a module) is omitted for all architectures if the module is not enabled in the build. Cc: Tony Luck Link: https://lore.kernel.org/linux-efi/20201214152200.38353-1-ardb@kernel.org Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 1cd5d91d8ca1..763b816ba19c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -817,12 +817,6 @@ static inline bool efi_enabled(int feature) static inline void efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {} -static inline bool -efi_capsule_pending(int *reset_type) -{ - return false; -} - static inline bool efi_soft_reserve_enabled(void) { return false; @@ -1038,6 +1032,7 @@ bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, bool efivar_variable_is_removable(efi_guid_t vendor, const char *name, size_t len); +#if IS_ENABLED(CONFIG_EFI_CAPSULE_LOADER) extern bool efi_capsule_pending(int *reset_type); extern int efi_capsule_supported(efi_guid_t guid, u32 flags, @@ -1045,6 +1040,9 @@ extern int efi_capsule_supported(efi_guid_t guid, u32 flags, extern int efi_capsule_update(efi_capsule_header_t *capsule, phys_addr_t *pages); +#else +static inline bool efi_capsule_pending(int *reset_type) { return false; } +#endif #ifdef CONFIG_EFI_RUNTIME_MAP int efi_runtime_map_init(struct kobject *); -- cgit v1.2.3 From 50c9132ddfb2024e96900407beeec660cf9848bd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 25 Sep 2020 07:55:39 -0400 Subject: ceph: add new RECOVER mount_state when recovering session When recovering a session (a'la recover_session=clean), we want to do all of the operations that we do on a forced umount, but changing the mount state to SHUTDOWN is can cause queued MDS requests to fail when the session comes back. Most of those can idle until the session is recovered in this situation. Reserve SHUTDOWN state for forced umount, and make a new RECOVER state for the forced reconnect situation. Change several tests for equality with SHUTDOWN to test for that or RECOVER. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index c8645f0b797d..eb5a7ca13f9c 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -104,6 +104,7 @@ enum { CEPH_MOUNT_UNMOUNTING, CEPH_MOUNT_UNMOUNTED, CEPH_MOUNT_SHUTDOWN, + CEPH_MOUNT_RECOVER, }; static inline unsigned long ceph_timeout_jiffies(unsigned long timeout) -- cgit v1.2.3 From 36c9478d6069994848c8897755b4380aa0a29dd3 Mon Sep 17 00:00:00 2001 From: "Liu, Changcheng" Date: Tue, 10 Nov 2020 21:20:08 +0800 Subject: libceph: remove unused port macros 1. monitor's default port is defined by CEPH_MON_PORT 2. CEPH_PORT_START and CEPH_PORT_LAST are not needed. Signed-off-by: Changcheng Liu Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/msgr.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 9e50aede46c8..46939485f2c3 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -8,15 +8,6 @@ #define CEPH_MON_PORT 6789 /* default monitor port */ -/* - * client-side processes will try to bind to ports in this - * range, simply for the benefit of tools like nmap or wireshark - * that would like to identify the protocol. - */ -#define CEPH_PORT_FIRST 6789 -#define CEPH_PORT_START 6800 /* non-monitors start here */ -#define CEPH_PORT_LAST 6900 - /* * tcp connection banner. include a protocol version. and adjust * whenever the wire protocol changes. try to keep this string length -- cgit v1.2.3 From 968cd14edc3acff251f98bdc1eb15f13f05dd5fb Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 9 Dec 2020 10:52:20 +0800 Subject: ceph: set osdmap epoch for setxattr When setting the file/dir layout, it may need data pool info. So in mds server, it needs to check the osdmap. At present, if mds doesn't find the data pool specified, it will try to get the latest osdmap. Now if pass the osd epoch for setxattr, the mds server can only check this epoch of osdmap. URL: https://tracker.ceph.com/issues/48504 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 455e9b9e2adf..c0f1b921ec69 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -424,6 +424,7 @@ union ceph_mds_request_args { } __attribute__ ((packed)) open; struct { __le32 flags; + __le32 osdmap_epoch; /* used for setting file/dir layouts */ } __attribute__ ((packed)) setxattr; struct { struct ceph_file_layout_legacy layout; -- cgit v1.2.3 From 4f1ddb1ea874c7703528a8c21b77b7f2462ee247 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 9 Dec 2020 10:12:59 -0500 Subject: ceph: implement updated ceph_mds_request_head structure When we added the btime feature in mainline ceph, we had to extend struct ceph_mds_request_args so that it could be set. Implement the same in the kernel client. Rename ceph_mds_request_head with a _old extension, and a union ceph_mds_request_args_ext to allow for the extended size of the new header format. Add the appropriate code to handle both formats in struct create_request_message and key the behavior on whether the peer supports CEPH_FEATURE_FS_BTIME. The gid_list field in the payload is now populated from the saved credential. For now, we don't add any support for setting the btime via setattr, but this does enable us to add that in the future. [ idryomov: break unnecessarily long lines ] Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index c0f1b921ec69..d44d98033d58 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -446,11 +446,25 @@ union ceph_mds_request_args { } __attribute__ ((packed)) lookupino; } __attribute__ ((packed)); +union ceph_mds_request_args_ext { + union ceph_mds_request_args old; + struct { + __le32 mode; + __le32 uid; + __le32 gid; + struct ceph_timespec mtime; + struct ceph_timespec atime; + __le64 size, old_size; /* old_size needed by truncate */ + __le32 mask; /* CEPH_SETATTR_* */ + struct ceph_timespec btime; + } __attribute__ ((packed)) setattr_ext; +}; + #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ #define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ #define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */ -struct ceph_mds_request_head { +struct ceph_mds_request_head_old { __le64 oldest_client_tid; __le32 mdsmap_epoch; /* on client */ __le32 flags; /* CEPH_MDS_FLAG_* */ @@ -463,6 +477,22 @@ struct ceph_mds_request_head { union ceph_mds_request_args args; } __attribute__ ((packed)); +#define CEPH_MDS_REQUEST_HEAD_VERSION 1 + +struct ceph_mds_request_head { + __le16 version; /* struct version */ + __le64 oldest_client_tid; + __le32 mdsmap_epoch; /* on client */ + __le32 flags; /* CEPH_MDS_FLAG_* */ + __u8 num_retry, num_fwd; /* count retry, fwd attempts */ + __le16 num_releases; /* # include cap/lease release records */ + __le32 op; /* mds op code */ + __le32 caller_uid, caller_gid; + __le64 ino; /* use this ino for openc, mkdir, mknod, + etc. (if replaying) */ + union ceph_mds_request_args_ext args; +} __attribute__ ((packed)); + /* cap/lease release record */ struct ceph_mds_request_release { __le64 ino, cap_id; /* ino and unique cap id */ -- cgit v1.2.3 From 418af5b3bfc4f1ef4854e83c5be8a0bdce51e95c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 29 Oct 2020 14:49:10 +0100 Subject: libceph: lower exponential backoff delay The current setting allows the backoff to climb up to 5 minutes. This is too high -- it becomes hard to tell whether the client is stuck on something or just in backoff. In userspace, ms_max_backoff is defaulted to 15 seconds. Let's do the same. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 60b324efd1c4..b47c7cc4c90a 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -241,8 +241,8 @@ struct ceph_msg { }; /* ceph connection fault delay defaults, for exponential backoff */ -#define BASE_DELAY_INTERVAL (HZ/2) -#define MAX_DELAY_INTERVAL (5 * 60 * HZ) +#define BASE_DELAY_INTERVAL (HZ / 4) +#define MAX_DELAY_INTERVAL (15 * HZ) /* * A single connection with another host. -- cgit v1.2.3 From 5cd8da3a1ca2160b8f9c2ff6a96762e66410ea38 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 13 Oct 2020 17:23:22 +0200 Subject: libceph: drop msg->ack_stamp field It is set in process_ack() but never used. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index b47c7cc4c90a..6f77e70db855 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -235,7 +235,6 @@ struct ceph_msg { bool more_to_follow; bool needs_out_seq; int front_alloc_len; - unsigned long ack_stamp; /* tx: when we were acked */ struct ceph_msgpool *pool; }; -- cgit v1.2.3 From 30be780a87211de75b93935c20a0913e46744a3f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 9 Nov 2020 14:11:26 +0100 Subject: libceph: make con->state an int unsigned long is a leftover from when con->state used to be a set of bits managed with set_bit(), clear_bit(), etc. Save a bit of memory. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 6f77e70db855..f053de4f46dd 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -257,13 +257,13 @@ struct ceph_connection { struct ceph_messenger *msgr; + int state; atomic_t sock_state; struct socket *sock; struct ceph_entity_addr peer_addr; /* peer address */ struct ceph_entity_addr peer_addr_for_me; unsigned long flags; - unsigned long state; const char *error_msg; /* error message, if any */ struct ceph_entity_name peer_name; /* peer name */ -- cgit v1.2.3 From 6d7f62bfb5b5da6b0b37174c1fd32545f3b5b90d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 9 Nov 2020 14:59:02 +0100 Subject: libceph: rename and export con->state states In preparation for msgr2, rename msgr1 specific states and move the defines to the header file. Also drop state transition comments. They don't cover all possible transitions (e.g. NEGOTIATING -> STANDBY, etc) and currently do more harm than good. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index f053de4f46dd..e6be85b67c6c 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -239,6 +239,16 @@ struct ceph_msg { struct ceph_msgpool *pool; }; +/* + * connection states + */ +#define CEPH_CON_S_CLOSED 1 +#define CEPH_CON_S_PREOPEN 2 +#define CEPH_CON_S_V1_BANNER 3 +#define CEPH_CON_S_V1_CONNECT_MSG 4 +#define CEPH_CON_S_OPEN 5 +#define CEPH_CON_S_STANDBY 6 + /* ceph connection fault delay defaults, for exponential backoff */ #define BASE_DELAY_INTERVAL (HZ / 4) #define MAX_DELAY_INTERVAL (15 * HZ) @@ -257,7 +267,7 @@ struct ceph_connection { struct ceph_messenger *msgr; - int state; + int state; /* CEPH_CON_S_* */ atomic_t sock_state; struct socket *sock; struct ceph_entity_addr peer_addr; /* peer address */ -- cgit v1.2.3 From 3fefd43e741a5b8d55aeb9115ff488ad2cad439b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 9 Nov 2020 14:56:36 +0100 Subject: libceph: rename and export con->flags bits In preparation for msgr2, move the defines to the header file. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e6be85b67c6c..b6962a0fd76f 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -249,6 +249,17 @@ struct ceph_msg { #define CEPH_CON_S_OPEN 5 #define CEPH_CON_S_STANDBY 6 +/* + * ceph_connection flag bits + */ +#define CEPH_CON_F_LOSSYTX 0 /* we can close channel or drop + messages on errors */ +#define CEPH_CON_F_KEEPALIVE_PENDING 1 /* we need to send a keepalive */ +#define CEPH_CON_F_WRITE_PENDING 2 /* we have data ready to send */ +#define CEPH_CON_F_SOCK_CLOSED 3 /* socket state changed to closed */ +#define CEPH_CON_F_BACKOFF 4 /* need to retry queuing delayed + work */ + /* ceph connection fault delay defaults, for exponential backoff */ #define BASE_DELAY_INTERVAL (HZ / 4) #define MAX_DELAY_INTERVAL (15 * HZ) @@ -273,7 +284,7 @@ struct ceph_connection { struct ceph_entity_addr peer_addr; /* peer address */ struct ceph_entity_addr peer_addr_for_me; - unsigned long flags; + unsigned long flags; /* CEPH_CON_F_* */ const char *error_msg; /* error message, if any */ struct ceph_entity_name peer_name; /* peer name */ -- cgit v1.2.3 From 699921d9e68ff3d9f8645488c12f4689c6533d70 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 9 Nov 2020 14:37:06 +0100 Subject: libceph: export zero_page In preparation for msgr2, make zero_page global. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index b6962a0fd76f..513ed5f90bff 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -343,6 +343,7 @@ struct ceph_connection { unsigned long delay; /* current delay interval */ }; +extern struct page *ceph_zero_page; extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr); -- cgit v1.2.3 From 6503e0b69c9d4d78b5450db01e79328f8ed4ef21 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 9 Nov 2020 16:29:47 +0100 Subject: libceph: export remaining protocol independent infrastructure In preparation for msgr2, make all protocol independent functions in messenger.c global. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 513ed5f90bff..93815f1a42b5 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -345,13 +345,50 @@ struct ceph_connection { extern struct page *ceph_zero_page; +void ceph_con_flag_clear(struct ceph_connection *con, unsigned long con_flag); +void ceph_con_flag_set(struct ceph_connection *con, unsigned long con_flag); +bool ceph_con_flag_test(struct ceph_connection *con, unsigned long con_flag); +bool ceph_con_flag_test_and_clear(struct ceph_connection *con, + unsigned long con_flag); +bool ceph_con_flag_test_and_set(struct ceph_connection *con, + unsigned long con_flag); + +void ceph_encode_my_addr(struct ceph_messenger *msgr); + +int ceph_tcp_connect(struct ceph_connection *con); +int ceph_con_close_socket(struct ceph_connection *con); +void ceph_con_reset_session(struct ceph_connection *con); + +u32 ceph_get_global_seq(struct ceph_messenger *msgr, u32 gt); +void ceph_con_discard_sent(struct ceph_connection *con, u64 ack_seq); +void ceph_con_discard_requeued(struct ceph_connection *con, u64 reconnect_seq); + +void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor, + struct ceph_msg *msg, size_t length); +struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, + size_t *page_offset, size_t *length, + bool *last_piece); +void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes); + +u32 ceph_crc32c_page(u32 crc, struct page *page, unsigned int page_offset, + unsigned int length); + +bool ceph_addr_is_blank(const struct ceph_entity_addr *addr); +int ceph_addr_port(const struct ceph_entity_addr *addr); +void ceph_addr_set_port(struct ceph_entity_addr *addr, int p); + +void ceph_con_process_message(struct ceph_connection *con); +int ceph_con_in_msg_alloc(struct ceph_connection *con, + struct ceph_msg_header *hdr, int *skip); +void ceph_con_get_out_msg(struct ceph_connection *con); + + extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr); extern int ceph_parse_ips(const char *c, const char *end, struct ceph_entity_addr *addr, int max_count, int *count); - extern int ceph_msgr_init(void); extern void ceph_msgr_exit(void); extern void ceph_msgr_flush(void); -- cgit v1.2.3 From 566050e17e53db283d4e26b73b4b50556f97ce7b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 12 Nov 2020 12:55:39 +0100 Subject: libceph: separate msgr1 protocol implementation In preparation for msgr2, define internal messenger <-> protocol interface (as opposed to external messenger <-> client interface, which is struct ceph_connection_operations) consisting of try_read(), try_write(), revoke(), revoke_incoming(), opened(), reset_session() and reset_protocol() ops. The semantics are exactly the same as they are now. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 93815f1a42b5..8cc8b08eb3dd 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -382,6 +382,14 @@ int ceph_con_in_msg_alloc(struct ceph_connection *con, struct ceph_msg_header *hdr, int *skip); void ceph_con_get_out_msg(struct ceph_connection *con); +int ceph_con_v1_try_read(struct ceph_connection *con); +int ceph_con_v1_try_write(struct ceph_connection *con); +void ceph_con_v1_revoke(struct ceph_connection *con); +void ceph_con_v1_revoke_incoming(struct ceph_connection *con); +bool ceph_con_v1_opened(struct ceph_connection *con); +void ceph_con_v1_reset_session(struct ceph_connection *con); +void ceph_con_v1_reset_protocol(struct ceph_connection *con); + extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr); -- cgit v1.2.3 From 2f713615ddd9d805b6c5e79c52e0e11af99d2bf1 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 12 Nov 2020 15:48:06 +0100 Subject: libceph: move msgr1 protocol implementation to its own file A pure move, no other changes. Note that ceph_tcp_recv{msg,page}() and ceph_tcp_send{msg,page}() helpers are also moved. msgr2 will bring its own, more efficient, variants based on iov_iter. Switching msgr1 to them was considered but decided against to avoid subtle regressions. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 8cc8b08eb3dd..b5268127c55e 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -382,6 +382,7 @@ int ceph_con_in_msg_alloc(struct ceph_connection *con, struct ceph_msg_header *hdr, int *skip); void ceph_con_get_out_msg(struct ceph_connection *con); +/* messenger_v1.c */ int ceph_con_v1_try_read(struct ceph_connection *con); int ceph_con_v1_try_write(struct ceph_connection *con); void ceph_con_v1_revoke(struct ceph_connection *con); -- cgit v1.2.3 From a56dd9bf47220c3206f27075af8bdfb219a2a3cf Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 12 Nov 2020 16:31:41 +0100 Subject: libceph: move msgr1 protocol specific fields to its own struct A couple whitespace fixups, no functional changes. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 76 +++++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index b5268127c55e..54a64e8dfce6 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -264,6 +264,43 @@ struct ceph_msg { #define BASE_DELAY_INTERVAL (HZ / 4) #define MAX_DELAY_INTERVAL (15 * HZ) +struct ceph_connection_v1_info { + struct kvec out_kvec[8], /* sending header/footer data */ + *out_kvec_cur; + int out_kvec_left; /* kvec's left in out_kvec */ + int out_skip; /* skip this many bytes */ + int out_kvec_bytes; /* total bytes left */ + bool out_more; /* there is more data after the kvecs */ + bool out_msg_done; + + struct ceph_auth_handshake *auth; + int auth_retry; /* true if we need a newer authorizer */ + + /* connection negotiation temps */ + u8 in_banner[CEPH_BANNER_MAX_LEN]; + struct ceph_entity_addr actual_peer_addr; + struct ceph_entity_addr peer_addr_for_me; + struct ceph_msg_connect out_connect; + struct ceph_msg_connect_reply in_reply; + + int in_base_pos; /* bytes read */ + + /* message in temps */ + u8 in_tag; /* protocol control byte */ + struct ceph_msg_header in_hdr; + __le64 in_temp_ack; /* for reading an ack */ + + /* message out temps */ + struct ceph_msg_header out_hdr; + __le64 out_temp_ack; /* for writing an ack */ + struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2 + stamp */ + + u32 connect_seq; /* identify the most recent connection + attempt for this session */ + u32 peer_global_seq; /* peer's global seq for this connection */ +}; + /* * A single connection with another host. * @@ -281,21 +318,13 @@ struct ceph_connection { int state; /* CEPH_CON_S_* */ atomic_t sock_state; struct socket *sock; - struct ceph_entity_addr peer_addr; /* peer address */ - struct ceph_entity_addr peer_addr_for_me; unsigned long flags; /* CEPH_CON_F_* */ const char *error_msg; /* error message, if any */ struct ceph_entity_name peer_name; /* peer name */ - + struct ceph_entity_addr peer_addr; /* peer address */ u64 peer_features; - u32 connect_seq; /* identify the most recent connection - attempt for this connection, client */ - u32 peer_global_seq; /* peer's global seq for this connection */ - - struct ceph_auth_handshake *auth; - int auth_retry; /* true if we need a newer authorizer */ struct mutex mutex; @@ -306,41 +335,18 @@ struct ceph_connection { u64 in_seq, in_seq_acked; /* last message received, acked */ - /* connection negotiation temps */ - char in_banner[CEPH_BANNER_MAX_LEN]; - struct ceph_msg_connect out_connect; - struct ceph_msg_connect_reply in_reply; - struct ceph_entity_addr actual_peer_addr; - - /* message out temps */ - struct ceph_msg_header out_hdr; + struct ceph_msg *in_msg; struct ceph_msg *out_msg; /* sending message (== tail of out_sent) */ - bool out_msg_done; - - struct kvec out_kvec[8], /* sending header/footer data */ - *out_kvec_cur; - int out_kvec_left; /* kvec's left in out_kvec */ - int out_skip; /* skip this many bytes */ - int out_kvec_bytes; /* total bytes left */ - int out_more; /* there is more data after the kvecs */ - __le64 out_temp_ack; /* for writing an ack */ - struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2 - stamp */ - /* message in temps */ - struct ceph_msg_header in_hdr; - struct ceph_msg *in_msg; u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */ - char in_tag; /* protocol control byte */ - int in_base_pos; /* bytes read */ - __le64 in_temp_ack; /* for reading an ack */ - struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */ struct delayed_work work; /* send|recv work */ unsigned long delay; /* current delay interval */ + + struct ceph_connection_v1_info v1; }; extern struct page *ceph_zero_page; -- cgit v1.2.3 From 285ea34fc876aa0a2c5e65d310c4a41269e2e5f2 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 26 Oct 2020 16:47:20 +0100 Subject: libceph, ceph: incorporate nautilus cephx changes - request service tickets together with auth ticket. Currently we get auth ticket via CEPHX_GET_AUTH_SESSION_KEY op and then request service tickets via CEPHX_GET_PRINCIPAL_SESSION_KEY op in a separate message. Since nautilus, desired service tickets are shared togther with auth ticket in CEPHX_GET_AUTH_SESSION_KEY reply. - propagate session key and connection secret, if any. In preparation for msgr2, update handle_reply() and verify_authorizer_reply() auth ops to propagate session key and connection secret. Since nautilus, if secure mode is negotiated, connection secret is shared either in CEPHX_GET_AUTH_SESSION_KEY reply (for mons) or in a final authorizer reply (for osds and mdses). Signed-off-by: Ilya Dryomov --- include/linux/ceph/auth.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index 6728c2ee0205..d9e7d0bcdaf1 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -53,7 +53,9 @@ struct ceph_auth_client_ops { */ int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end); int (*handle_reply)(struct ceph_auth_client *ac, int result, - void *buf, void *end); + void *buf, void *end, u8 *session_key, + int *session_key_len, u8 *con_secret, + int *con_secret_len); /* * Create authorizer for connecting to a service, and verify @@ -69,7 +71,10 @@ struct ceph_auth_client_ops { void *challenge_buf, int challenge_buf_len); int (*verify_authorizer_reply)(struct ceph_auth_client *ac, - struct ceph_authorizer *a); + struct ceph_authorizer *a, + void *reply, int reply_len, + u8 *session_key, int *session_key_len, + u8 *con_secret, int *con_secret_len); void (*invalidate_authorizer)(struct ceph_auth_client *ac, int peer_type); @@ -126,8 +131,11 @@ int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, struct ceph_authorizer *a, void *challenge_buf, int challenge_buf_len); -extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, - struct ceph_authorizer *a); +int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *reply, int reply_len, + u8 *session_key, int *session_key_len, + u8 *con_secret, int *con_secret_len); extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type); -- cgit v1.2.3 From 59711f9ec219bf5245a8e95989803fb503adc52d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 26 Oct 2020 17:01:53 +0100 Subject: libceph: amend cephx init_protocol() and build_request() In msgr2, initial authentication happens with an exchange of msgr2 control frames -- MAuth message and struct ceph_mon_request_header aren't used. Make that optional. Stop reporting cephx protocol as "x". Use "cephx" instead. Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index d44d98033d58..6d986e52000b 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -95,6 +95,7 @@ struct ceph_dir_layout { #define CEPH_AUTH_UID_DEFAULT ((__u64) -1) +const char *ceph_auth_proto_name(int proto); /********************************************* * message layer -- cgit v1.2.3 From c1c0ce78f479cf4d7dfe72c4c1cabbf0bc0730c9 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 26 Oct 2020 17:05:44 +0100 Subject: libceph: drop ac->ops->name field Signed-off-by: Ilya Dryomov --- include/linux/ceph/auth.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index d9e7d0bcdaf1..5f64f66309fa 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -32,8 +32,6 @@ struct ceph_auth_handshake { }; struct ceph_auth_client_ops { - const char *name; - /* * true if we are authenticated and can connect to * services. -- cgit v1.2.3 From a5cbd5fc22d5043a8a76e15d75d031fe24d1f69c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 30 Oct 2020 13:30:51 +0100 Subject: libceph, ceph: get and handle cluster maps with addrvecs In preparation for msgr2, make the cluster send us maps with addrvecs including both LEGACY and MSGR2 addrs instead of a single LEGACY addr. This means advertising support for SERVER_NAUTILUS and also some older features: SERVER_MIMIC, MONENC and MONNAMES. MONNAMES and MONENC are actually pre-argonaut, we just never updated ceph_monmap_decode() for them. Decoding is unconditional, see commit 23c625ce3065 ("libceph: assume argonaut on the server side"). SERVER_MIMIC doesn't bear any meaning for the kernel client. Since ceph_decode_entity_addrvec() is guarded by encoding version checks (and in msgr2 case it is guarded implicitly by the fact that server is speaking msgr2), we assume MSG_ADDR2 for it. Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 11 ++++++++--- include/linux/ceph/decode.h | 4 ++++ include/linux/ceph/mdsmap.h | 2 +- include/linux/ceph/osdmap.h | 4 ++-- 4 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 999636d53cf2..3a47acd9cc14 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -8,7 +8,8 @@ * feature. Base case is 1 (first use). */ #define CEPH_FEATURE_INCARNATION_1 (0ull) -#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL +#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // SERVER_JEWEL +#define CEPH_FEATURE_INCARNATION_3 ((1ull<<57)|(1ull<<28)) // SERVER_MIMIC #define DEFINE_CEPH_FEATURE(bit, incarnation, name) \ static const uint64_t __maybe_unused CEPH_FEATURE_##name = (1ULL< Date: Wed, 25 Nov 2020 14:41:59 +0100 Subject: libceph, rbd: ignore addr->type while comparing in some cases For libceph, this ensures that libceph instance sharing (share option) continues to work. For rbd, this avoids blocklisting alive lock owners (locker addr is always LEGACY, while watcher addr is ANY in nautilus). Signed-off-by: Ilya Dryomov --- include/linux/ceph/msgr.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 46939485f2c3..9a897a60f20b 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -52,11 +52,18 @@ extern const char *ceph_entity_type_name(int type); * entity_addr -- network address */ struct ceph_entity_addr { - __le32 type; + __le32 type; /* CEPH_ENTITY_ADDR_TYPE_* */ __le32 nonce; /* unique id for process (e.g. pid) */ struct sockaddr_storage in_addr; } __attribute__ ((packed)); +static inline bool ceph_addr_equal_no_type(const struct ceph_entity_addr *lhs, + const struct ceph_entity_addr *rhs) +{ + return !memcmp(&lhs->in_addr, &rhs->in_addr, sizeof(lhs->in_addr)) && + lhs->nonce == rhs->nonce; +} + struct ceph_entity_inst { struct ceph_entity_name name; struct ceph_entity_addr addr; -- cgit v1.2.3 From 00498b994113a871a556f7ff24a4cf8a00611700 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 19 Nov 2020 16:04:58 +0100 Subject: libceph: introduce connection modes and ms_mode option msgr2 supports two connection modes: crc (plain) and secure (on-wire encryption). Connection mode is picked by server based on input from client. Introduce ms_mode option: ms_mode=legacy - msgr1 (default) ms_mode=crc - crc mode, if denied fail ms_mode=secure - secure mode, if denied fail ms_mode=prefer-crc - crc mode, if denied agree to secure mode ms_mode=prefer-secure - secure mode, if denied agree to crc mode ms_mode affects all connections, we don't separate connections to mons like it's done in userspace with ms_client_mode vs ms_mon_client_mode. For now the default is legacy, to be flipped to prefer-crc after some time. Signed-off-by: Ilya Dryomov --- include/linux/ceph/auth.h | 8 ++++++-- include/linux/ceph/ceph_fs.h | 6 ++++++ include/linux/ceph/libceph.h | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index 5f64f66309fa..6fc058fe9efa 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -98,11 +98,15 @@ struct ceph_auth_client { const struct ceph_crypto_key *key; /* our secret key */ unsigned want_keys; /* which services we want */ + int preferred_mode; /* CEPH_CON_MODE_* */ + int fallback_mode; /* ditto */ + struct mutex mutex; }; -extern struct ceph_auth_client *ceph_auth_init(const char *name, - const struct ceph_crypto_key *key); +struct ceph_auth_client *ceph_auth_init(const char *name, + const struct ceph_crypto_key *key, + const int *con_modes); extern void ceph_auth_destroy(struct ceph_auth_client *ac); extern void ceph_auth_reset(struct ceph_auth_client *ac); diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 6d986e52000b..ce22d5469670 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -93,9 +93,15 @@ struct ceph_dir_layout { #define CEPH_AUTH_NONE 0x1 #define CEPH_AUTH_CEPHX 0x2 +/* msgr2 protocol modes */ +#define CEPH_CON_MODE_UNKNOWN 0x0 +#define CEPH_CON_MODE_CRC 0x1 +#define CEPH_CON_MODE_SECURE 0x2 + #define CEPH_AUTH_UID_DEFAULT ((__u64) -1) const char *ceph_auth_proto_name(int proto); +const char *ceph_con_mode_name(int mode); /********************************************* * message layer diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index eb5a7ca13f9c..8765a5ad267a 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -53,6 +53,7 @@ struct ceph_options { unsigned long osd_keepalive_timeout; /* jiffies */ unsigned long osd_request_timeout; /* jiffies */ u32 read_from_replica; /* CEPH_OSD_FLAG_BALANCE/LOCALIZE_READS */ + int con_modes[2]; /* CEPH_CON_MODE_* */ /* * any type that can't be simply compared or doesn't need -- cgit v1.2.3 From cd1a677cad994021b19665ed476aea63f5d54f31 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 19 Nov 2020 16:59:08 +0100 Subject: libceph, ceph: implement msgr2.1 protocol (crc and secure modes) Implement msgr2.1 wire protocol, available since nautilus 14.2.11 and octopus 15.2.5. msgr2.0 wire protocol is not implemented -- it has several security, integrity and robustness issues and therefore considered deprecated. Signed-off-by: Ilya Dryomov --- include/linux/ceph/auth.h | 36 ++++++++++- include/linux/ceph/ceph_fs.h | 4 ++ include/linux/ceph/decode.h | 4 ++ include/linux/ceph/libceph.h | 9 ++- include/linux/ceph/messenger.h | 136 ++++++++++++++++++++++++++++++++++++++++- include/linux/ceph/msgr.h | 48 +++++++++++++++ 6 files changed, 231 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index 6fc058fe9efa..3fbe72ebd779 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -120,8 +120,12 @@ int ceph_auth_entity_name_encode(const char *name, void **p, void *end); extern int ceph_build_auth(struct ceph_auth_client *ac, void *msg_buf, size_t msg_len); - extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac); + +int __ceph_auth_get_authorizer(struct ceph_auth_client *ac, + struct ceph_auth_handshake *auth, + int peer_type, bool force_new, + int *proto, int *pref_mode, int *fallb_mode); extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *auth); @@ -157,4 +161,34 @@ int ceph_auth_check_message_signature(struct ceph_auth_handshake *auth, return auth->check_message_signature(auth, msg); return 0; } + +int ceph_auth_get_request(struct ceph_auth_client *ac, void *buf, int buf_len); +int ceph_auth_handle_reply_more(struct ceph_auth_client *ac, void *reply, + int reply_len, void *buf, int buf_len); +int ceph_auth_handle_reply_done(struct ceph_auth_client *ac, + u64 global_id, void *reply, int reply_len, + u8 *session_key, int *session_key_len, + u8 *con_secret, int *con_secret_len); +bool ceph_auth_handle_bad_method(struct ceph_auth_client *ac, + int used_proto, int result, + const int *allowed_protos, int proto_cnt, + const int *allowed_modes, int mode_cnt); + +int ceph_auth_get_authorizer(struct ceph_auth_client *ac, + struct ceph_auth_handshake *auth, + int peer_type, void *buf, int *buf_len); +int ceph_auth_handle_svc_reply_more(struct ceph_auth_client *ac, + struct ceph_auth_handshake *auth, + void *reply, int reply_len, + void *buf, int *buf_len); +int ceph_auth_handle_svc_reply_done(struct ceph_auth_client *ac, + struct ceph_auth_handshake *auth, + void *reply, int reply_len, + u8 *session_key, int *session_key_len, + u8 *con_secret, int *con_secret_len); +bool ceph_auth_handle_bad_authorizer(struct ceph_auth_client *ac, + int peer_type, int used_proto, int result, + const int *allowed_protos, int proto_cnt, + const int *allowed_modes, int mode_cnt); + #endif diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index ce22d5469670..e41a811026f6 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -93,6 +93,10 @@ struct ceph_dir_layout { #define CEPH_AUTH_NONE 0x1 #define CEPH_AUTH_CEPHX 0x2 +#define CEPH_AUTH_MODE_NONE 0 +#define CEPH_AUTH_MODE_AUTHORIZER 1 +#define CEPH_AUTH_MODE_MON 10 + /* msgr2 protocol modes */ #define CEPH_CON_MODE_UNKNOWN 0x0 #define CEPH_CON_MODE_CRC 0x1 diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 9a934e04f841..04f3ace5787b 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -221,6 +221,7 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv, #define CEPH_ENTITY_ADDR_TYPE_NONE 0 #define CEPH_ENTITY_ADDR_TYPE_LEGACY __cpu_to_le32(1) #define CEPH_ENTITY_ADDR_TYPE_MSGR2 __cpu_to_le32(2) +#define CEPH_ENTITY_ADDR_TYPE_ANY __cpu_to_le32(3) static inline void ceph_encode_banner_addr(struct ceph_entity_addr *a) { @@ -243,6 +244,9 @@ extern int ceph_decode_entity_addr(void **p, void *end, int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2, struct ceph_entity_addr *addr); +int ceph_entity_addr_encoding_len(const struct ceph_entity_addr *addr); +void ceph_encode_entity_addr(void **p, const struct ceph_entity_addr *addr); + /* * encoders */ diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 8765a5ad267a..eb9008bb3992 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -31,10 +31,10 @@ #define CEPH_OPT_FSID (1<<0) #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ #define CEPH_OPT_MYIP (1<<2) /* specified my ip */ -#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */ +#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes (msgr1) */ #define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */ #define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */ -#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs */ +#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs (msgr1) */ #define CEPH_OPT_ABORT_ON_FULL (1<<7) /* abort w/ ENOSPC when full */ #define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY) @@ -84,6 +84,7 @@ struct ceph_options { #define CEPH_MONC_HUNT_BACKOFF 2 #define CEPH_MONC_HUNT_MAX_MULT 10 +#define CEPH_MSG_MAX_CONTROL_LEN (16*1024*1024) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) @@ -152,6 +153,10 @@ struct ceph_client { #define from_msgr(ms) container_of(ms, struct ceph_client, msgr) +static inline bool ceph_msgr2(struct ceph_client *client) +{ + return client->options->con_modes[0] != CEPH_CON_MODE_UNKNOWN; +} /* * snapshots diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 54a64e8dfce6..0e6e9ad3c3bf 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -3,6 +3,7 @@ #define __FS_CEPH_MESSENGER_H #include +#include #include #include #include @@ -52,6 +53,23 @@ struct ceph_connection_operations { int (*sign_message) (struct ceph_msg *msg); int (*check_message_signature) (struct ceph_msg *msg); + + /* msgr2 authentication exchange */ + int (*get_auth_request)(struct ceph_connection *con, + void *buf, int *buf_len, + void **authorizer, int *authorizer_len); + int (*handle_auth_reply_more)(struct ceph_connection *con, + void *reply, int reply_len, + void *buf, int *buf_len, + void **authorizer, int *authorizer_len); + int (*handle_auth_done)(struct ceph_connection *con, + u64 global_id, void *reply, int reply_len, + u8 *session_key, int *session_key_len, + u8 *con_secret, int *con_secret_len); + int (*handle_auth_bad_method)(struct ceph_connection *con, + int used_proto, int result, + const int *allowed_protos, int proto_cnt, + const int *allowed_modes, int mode_cnt); }; /* use format string %s%lld */ @@ -246,8 +264,15 @@ struct ceph_msg { #define CEPH_CON_S_PREOPEN 2 #define CEPH_CON_S_V1_BANNER 3 #define CEPH_CON_S_V1_CONNECT_MSG 4 -#define CEPH_CON_S_OPEN 5 -#define CEPH_CON_S_STANDBY 6 +#define CEPH_CON_S_V2_BANNER_PREFIX 5 +#define CEPH_CON_S_V2_BANNER_PAYLOAD 6 +#define CEPH_CON_S_V2_HELLO 7 +#define CEPH_CON_S_V2_AUTH 8 +#define CEPH_CON_S_V2_AUTH_SIGNATURE 9 +#define CEPH_CON_S_V2_SESSION_CONNECT 10 +#define CEPH_CON_S_V2_SESSION_RECONNECT 11 +#define CEPH_CON_S_OPEN 12 +#define CEPH_CON_S_STANDBY 13 /* * ceph_connection flag bits @@ -301,6 +326,99 @@ struct ceph_connection_v1_info { u32 peer_global_seq; /* peer's global seq for this connection */ }; +#define CEPH_CRC_LEN 4 +#define CEPH_GCM_KEY_LEN 16 +#define CEPH_GCM_IV_LEN sizeof(struct ceph_gcm_nonce) +#define CEPH_GCM_BLOCK_LEN 16 +#define CEPH_GCM_TAG_LEN 16 + +#define CEPH_PREAMBLE_LEN 32 +#define CEPH_PREAMBLE_INLINE_LEN 48 +#define CEPH_PREAMBLE_PLAIN_LEN CEPH_PREAMBLE_LEN +#define CEPH_PREAMBLE_SECURE_LEN (CEPH_PREAMBLE_LEN + \ + CEPH_PREAMBLE_INLINE_LEN + \ + CEPH_GCM_TAG_LEN) +#define CEPH_EPILOGUE_PLAIN_LEN (1 + 3 * CEPH_CRC_LEN) +#define CEPH_EPILOGUE_SECURE_LEN (CEPH_GCM_BLOCK_LEN + CEPH_GCM_TAG_LEN) + +#define CEPH_FRAME_MAX_SEGMENT_COUNT 4 + +struct ceph_frame_desc { + int fd_tag; /* FRAME_TAG_* */ + int fd_seg_cnt; + int fd_lens[CEPH_FRAME_MAX_SEGMENT_COUNT]; /* logical */ + int fd_aligns[CEPH_FRAME_MAX_SEGMENT_COUNT]; +}; + +struct ceph_gcm_nonce { + __le32 fixed; + __le64 counter __packed; +}; + +struct ceph_connection_v2_info { + struct iov_iter in_iter; + struct kvec in_kvecs[5]; /* recvmsg */ + struct bio_vec in_bvec; /* recvmsg (in_cursor) */ + int in_kvec_cnt; + int in_state; /* IN_S_* */ + + struct iov_iter out_iter; + struct kvec out_kvecs[8]; /* sendmsg */ + struct bio_vec out_bvec; /* sendpage (out_cursor, out_zero), + sendmsg (out_enc_pages) */ + int out_kvec_cnt; + int out_state; /* OUT_S_* */ + + int out_zero; /* # of zero bytes to send */ + bool out_iter_sendpage; /* use sendpage if possible */ + + struct ceph_frame_desc in_desc; + struct ceph_msg_data_cursor in_cursor; + struct ceph_msg_data_cursor out_cursor; + + struct crypto_shash *hmac_tfm; /* post-auth signature */ + struct crypto_aead *gcm_tfm; /* on-wire encryption */ + struct aead_request *gcm_req; + struct crypto_wait gcm_wait; + struct ceph_gcm_nonce in_gcm_nonce; + struct ceph_gcm_nonce out_gcm_nonce; + + struct page **out_enc_pages; + int out_enc_page_cnt; + int out_enc_resid; + int out_enc_i; + + int con_mode; /* CEPH_CON_MODE_* */ + + void *conn_bufs[16]; + int conn_buf_cnt; + + struct kvec in_sign_kvecs[8]; + struct kvec out_sign_kvecs[8]; + int in_sign_kvec_cnt; + int out_sign_kvec_cnt; + + u64 client_cookie; + u64 server_cookie; + u64 global_seq; + u64 connect_seq; + u64 peer_global_seq; + + u8 in_buf[CEPH_PREAMBLE_SECURE_LEN]; + u8 out_buf[CEPH_PREAMBLE_SECURE_LEN]; + struct { + u8 late_status; /* FRAME_LATE_STATUS_* */ + union { + struct { + u32 front_crc; + u32 middle_crc; + u32 data_crc; + } __packed; + u8 pad[CEPH_GCM_BLOCK_LEN - 1]; + }; + } out_epil; +}; + /* * A single connection with another host. * @@ -346,7 +464,10 @@ struct ceph_connection { struct delayed_work work; /* send|recv work */ unsigned long delay; /* current delay interval */ - struct ceph_connection_v1_info v1; + union { + struct ceph_connection_v1_info v1; + struct ceph_connection_v2_info v2; + }; }; extern struct page *ceph_zero_page; @@ -397,6 +518,15 @@ bool ceph_con_v1_opened(struct ceph_connection *con); void ceph_con_v1_reset_session(struct ceph_connection *con); void ceph_con_v1_reset_protocol(struct ceph_connection *con); +/* messenger_v2.c */ +int ceph_con_v2_try_read(struct ceph_connection *con); +int ceph_con_v2_try_write(struct ceph_connection *con); +void ceph_con_v2_revoke(struct ceph_connection *con); +void ceph_con_v2_revoke_incoming(struct ceph_connection *con); +bool ceph_con_v2_opened(struct ceph_connection *con); +void ceph_con_v2_reset_session(struct ceph_connection *con); +void ceph_con_v2_reset_protocol(struct ceph_connection *con); + extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr); diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 9a897a60f20b..f5e02f6c0655 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -14,9 +14,39 @@ * constant. */ #define CEPH_BANNER "ceph v027" +#define CEPH_BANNER_LEN 9 #define CEPH_BANNER_MAX_LEN 30 +/* + * messenger V2 connection banner prefix. + * The full banner string should have the form: "ceph v2\n" + * the 2 bytes are the length of the remaining banner. + */ +#define CEPH_BANNER_V2 "ceph v2\n" +#define CEPH_BANNER_V2_LEN 8 +#define CEPH_BANNER_V2_PREFIX_LEN (CEPH_BANNER_V2_LEN + sizeof(__le16)) + +/* + * messenger V2 features + */ +#define CEPH_MSGR2_INCARNATION_1 (0ull) + +#define DEFINE_MSGR2_FEATURE(bit, incarnation, name) \ + static const uint64_t CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \ + static const uint64_t CEPH_MSGR2_FEATUREMASK_##name = \ + (1ULL << bit | CEPH_MSGR2_INCARNATION_##incarnation); + +#define HAVE_MSGR2_FEATURE(x, name) \ + (((x) & (CEPH_MSGR2_FEATUREMASK_##name)) == (CEPH_MSGR2_FEATUREMASK_##name)) + +DEFINE_MSGR2_FEATURE( 0, 1, REVISION_1) // msgr2.1 + +#define CEPH_MSGR2_SUPPORTED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1) + +#define CEPH_MSGR2_REQUIRED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1) + + /* * Rollover-safe type and comparator for 32-bit sequence numbers. * Comparator returns -1, 0, or 1. @@ -158,6 +188,24 @@ struct ceph_msg_header { __le32 crc; /* header crc32c */ } __attribute__ ((packed)); +struct ceph_msg_header2 { + __le64 seq; /* message seq# for this session */ + __le64 tid; /* transaction id */ + __le16 type; /* message type */ + __le16 priority; /* priority. higher value == higher priority */ + __le16 version; /* version of message encoding */ + + __le32 data_pre_padding_len; + __le16 data_off; /* sender: include full offset; + receiver: mask against ~PAGE_MASK */ + + __le64 ack_seq; + __u8 flags; + /* oldest code we think can decode this. unknown if zero. */ + __le16 compat_version; + __le16 reserved; +} __attribute__ ((packed)); + #define CEPH_MSG_PRIO_LOW 64 #define CEPH_MSG_PRIO_DEFAULT 127 #define CEPH_MSG_PRIO_HIGH 196 -- cgit v1.2.3 From 2f0df6cfa325d7106b8a65bc0e02db1086e3f73b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 19 Nov 2020 20:00:10 +0100 Subject: libceph: drop ceph_auth_{create,update}_authorizer() Signed-off-by: Ilya Dryomov --- include/linux/ceph/auth.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index 3fbe72ebd779..71b5d481c653 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -126,13 +126,7 @@ int __ceph_auth_get_authorizer(struct ceph_auth_client *ac, struct ceph_auth_handshake *auth, int peer_type, bool force_new, int *proto, int *pref_mode, int *fallb_mode); -extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac, - int peer_type, - struct ceph_auth_handshake *auth); void ceph_auth_destroy_authorizer(struct ceph_authorizer *a); -extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac, - int peer_type, - struct ceph_auth_handshake *a); int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, struct ceph_authorizer *a, void *challenge_buf, -- cgit v1.2.3 From 1b04fa9900263b4e217ca2509fd778b32c2b4eb2 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Wed, 9 Dec 2020 21:27:31 +0100 Subject: rcu-tasks: Move RCU-tasks initialization to before early_initcall() PowerPC testing encountered boot failures due to RCU Tasks not being fully initialized until core_initcall() time. This commit therefore initializes RCU Tasks (along with Rude RCU and RCU Tasks Trace) just before early_initcall() time, thus allowing waiting on RCU Tasks grace periods from early_initcall() handlers. Link: https://lore.kernel.org/rcu/87eekfh80a.fsf@dja-thinkpad.axtens.net/ Fixes: 36dadef23fcc ("kprobes: Init kprobes in early_initcall") Tested-by: Daniel Axtens Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6cdd0152c253..5c119d6cecf1 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -86,6 +86,12 @@ void rcu_sched_clock_irq(int user); void rcu_report_dead(unsigned int cpu); void rcutree_migrate_callbacks(int cpu); +#ifdef CONFIG_TASKS_RCU_GENERIC +void rcu_init_tasks_generic(void); +#else +static inline void rcu_init_tasks_generic(void) { } +#endif + #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); void rcu_sysrq_end(void); -- cgit v1.2.3 From be98e05a67f05ff4c8349a51fcec993a28be718c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 4 Dec 2020 21:02:42 +0100 Subject: dma-buf: Fix kerneldoc formatting I wanted to look up something and noticed the hyperlink doesn't work. While fixing that also noticed a trivial kerneldoc comment typo in the same section, fix that too. Reviewed-by: Michael J. Ruhl Reviewed-by: Simon Ser Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20201204200242.2671481-1-daniel.vetter@ffwll.ch --- include/linux/dma-buf-map.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h index 583a3a1f9447..278d489e4bdd 100644 --- a/include/linux/dma-buf-map.h +++ b/include/linux/dma-buf-map.h @@ -122,7 +122,7 @@ struct dma_buf_map { /** * DMA_BUF_MAP_INIT_VADDR - Initializes struct dma_buf_map to an address in system memory - * @vaddr: A system-memory address + * @vaddr_: A system-memory address */ #define DMA_BUF_MAP_INIT_VADDR(vaddr_) \ { \ -- cgit v1.2.3 From a313357e704f2617f298333e3e617a38b1719760 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:37 +0100 Subject: genirq: Move irq_has_action() into core code This function uses irq_to_desc() and is going to be used by modules to replace the open coded irq_to_desc() (ab)usage. The final goal is to remove the export of irq_to_desc() so driver cannot fiddle with it anymore. Move it into the core code and fixup the usage sites to include the proper header. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194042.548936472@linutronix.de --- include/linux/interrupt.h | 1 + include/linux/irqdesc.h | 7 +------ 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 870b3251e174..bb8ff9083e7d 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -232,6 +232,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); # define local_irq_enable_in_hardirq() local_irq_enable() #endif +bool irq_has_action(unsigned int irq); extern void disable_irq_nosync(unsigned int irq); extern bool disable_hardirq(unsigned int irq); extern void disable_irq(unsigned int irq); diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 5745491303e0..385a4fafe631 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -179,12 +179,7 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, /* Test to see if a driver has successfully requested an irq */ static inline int irq_desc_has_action(struct irq_desc *desc) { - return desc->action != NULL; -} - -static inline int irq_has_action(unsigned int irq) -{ - return irq_desc_has_action(irq_to_desc(irq)); + return desc && desc->action != NULL; } /** -- cgit v1.2.3 From fdd029630434b434b127efc7fba337da28f45658 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:38 +0100 Subject: genirq: Move status flag checks to core These checks are used by modules and prevent the removal of the export of irq_to_desc(). Move the accessor into the core. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194042.703779349@linutronix.de --- include/linux/irqdesc.h | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 385a4fafe631..308d7db8991f 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -223,28 +223,21 @@ irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip, data->chip = chip; } +bool irq_check_status_bit(unsigned int irq, unsigned int bitmask); + static inline bool irq_balancing_disabled(unsigned int irq) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); - return desc->status_use_accessors & IRQ_NO_BALANCING_MASK; + return irq_check_status_bit(irq, IRQ_NO_BALANCING_MASK); } static inline bool irq_is_percpu(unsigned int irq) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); - return desc->status_use_accessors & IRQ_PER_CPU; + return irq_check_status_bit(irq, IRQ_PER_CPU); } static inline bool irq_is_percpu_devid(unsigned int irq) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); - return desc->status_use_accessors & IRQ_PER_CPU_DEVID; + return irq_check_status_bit(irq, IRQ_PER_CPU_DEVID); } static inline void -- cgit v1.2.3 From f1c6306c0d6b50844ba02c8a53e35405e9c0db05 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:39 +0100 Subject: genirq: Move irq_set_lockdep_class() to core irq_set_lockdep_class() is used from modules and requires irq_to_desc() to be exported. Move it into the core code which lifts another requirement for the export. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194042.860029489@linutronix.de --- include/linux/irqdesc.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 308d7db8991f..4a1d016716f4 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -240,16 +240,14 @@ static inline bool irq_is_percpu_devid(unsigned int irq) return irq_check_status_bit(irq, IRQ_PER_CPU_DEVID); } +void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, + struct lock_class_key *request_class); static inline void irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, struct lock_class_key *request_class) { - struct irq_desc *desc = irq_to_desc(irq); - - if (desc) { - lockdep_set_class(&desc->lock, lock_class); - lockdep_set_class(&desc->request_mutex, request_class); - } + if (IS_ENABLED(CONFIG_LOCKDEP)) + __irq_set_lockdep_class(irq, lock_class, request_class); } #endif -- cgit v1.2.3 From 3e2380123fb96987ce958f623207010c667ffa7c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:40 +0100 Subject: genirq: Provide irq_get_effective_affinity() Provide an accessor to the effective interrupt affinity mask. Going to be used to replace open coded fiddling with the irq descriptor. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194042.967177918@linutronix.de --- include/linux/irq.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index c332871d59da..4aeb1c4c7e07 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -906,6 +906,13 @@ struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) } #endif +static inline struct cpumask *irq_get_effective_affinity_mask(unsigned int irq) +{ + struct irq_data *d = irq_get_irq_data(irq); + + return d ? irq_data_get_effective_affinity_mask(d) : NULL; +} + unsigned int arch_dynirq_lower_bound(unsigned int from); int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, -- cgit v1.2.3 From 26c19d0a8610fb233b31730fe26a31145f2d9796 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:43 +0100 Subject: genirq: Make kstat_irqs() static No more users outside the core code. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194043.268774449@linutronix.de --- include/linux/kernel_stat.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 89f0745c096d..44ae1a7eb9e3 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -67,7 +67,6 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) /* * Number of interrupts per specific IRQ source, since bootup */ -extern unsigned int kstat_irqs(unsigned int irq); extern unsigned int kstat_irqs_usr(unsigned int irq); /* -- cgit v1.2.3 From 501e2db67fa4264b517de5c7934e94cca89b3a1e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:25:44 +0100 Subject: genirq: Provide kstat_irqdesc_cpu() Most users of kstat_irqs_cpu() have the irq descriptor already. No point in calling into the core code and looking it up once more. Use it in per_cpu_count_show() to start with. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201210194043.362094758@linutronix.de --- include/linux/irqdesc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 4a1d016716f4..891b323266df 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -113,6 +113,12 @@ static inline void irq_unlock_sparse(void) { } extern struct irq_desc irq_desc[NR_IRQS]; #endif +static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc, + unsigned int cpu) +{ + return desc->kstat_irqs ? *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; +} + static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) { return container_of(data->common, struct irq_desc, irq_common_data); -- cgit v1.2.3 From ee2cc4276ba4909438f5894a218877660e1536d9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Dec 2020 21:08:00 +0100 Subject: cpufreq: Add special-purpose fast-switching callback for drivers First off, some cpufreq drivers (eg. intel_pstate) can pass hints beyond the current target frequency to the hardware and there are no provisions for doing that in the cpufreq framework. In particular, today the driver has to assume that it should not allow the frequency to fall below the one requested by the governor (or the required capacity may not be provided) which may not be the case and which may lead to excessive energy usage in some scenarios. Second, the hints passed by these drivers to the hardware need not be in terms of the frequency, so representing the utilization numbers coming from the scheduler as frequency before passing them to those drivers is not really useful. Address the two points above by adding a special-purpose replacement for the ->fast_switch callback, called ->adjust_perf, allowing the governor to pass abstract performance level (rather than frequency) values for the minimum (required) and target (desired) performance along with the CPU capacity to compare them to. Also update the schedutil governor to use the new callback instead of ->fast_switch if present and if the utilization mertics are frequency-invariant (that is requisite for the direct mapping between the utilization and the CPU performance levels to be a reasonable approximation). Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 14 ++++++++++++++ include/linux/sched/cpufreq.h | 5 +++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 584fccd4fcab..9c8b7437b6cd 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -320,6 +320,15 @@ struct cpufreq_driver { unsigned int index); unsigned int (*fast_switch)(struct cpufreq_policy *policy, unsigned int target_freq); + /* + * ->fast_switch() replacement for drivers that use an internal + * representation of performance levels and can pass hints other than + * the target performance level to the hardware. + */ + void (*adjust_perf)(unsigned int cpu, + unsigned long min_perf, + unsigned long target_perf, + unsigned long capacity); /* * Caches and returns the lowest driver-supported frequency greater than @@ -588,6 +597,11 @@ struct cpufreq_governor { /* Pass a target to the cpufreq driver */ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq); +void cpufreq_driver_adjust_perf(unsigned int cpu, + unsigned long min_perf, + unsigned long target_perf, + unsigned long capacity); +bool cpufreq_driver_has_adjust_perf(void); int cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation); diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index 3ed5aa18593f..6205578ab6ee 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h @@ -28,6 +28,11 @@ static inline unsigned long map_util_freq(unsigned long util, { return (freq + (freq >> 2)) * util / cap; } + +static inline unsigned long map_util_perf(unsigned long util) +{ + return util + (util >> 2); +} #endif /* CONFIG_CPU_FREQ */ #endif /* _LINUX_SCHED_CPUFREQ_H */ -- cgit v1.2.3 From f0dbd2bd1c22c6670e83ddcd46a9beb8b575e86d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Dec 2020 19:03:55 -0800 Subject: mm: slab: provide krealloc_array() When allocating an array of elements, users should check for multiplication overflow or preferably use one of the provided helpers like: kmalloc_array(). There's no krealloc_array() counterpart but there are many users who use regular krealloc() to reallocate arrays. Let's provide an actual krealloc_array() implementation. While at it: add some documentation regarding krealloc. Link: https://lkml.kernel.org/r/20201109110654.12547-3-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski Acked-by: Vlastimil Babka Cc: Alexander Shishkin Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Borislav Petkov Cc: Christian Knig Cc: Christoph Lameter Cc: Daniel Vetter Cc: Daniel Vetter Cc: David Airlie Cc: David Rientjes Cc: Gustavo Padovan Cc: James Morse Cc: Jaroslav Kysela Cc: Jason Wang Cc: Joonsoo Kim Cc: Linus Walleij Cc: Maarten Lankhorst Cc: Mauro Carvalho Chehab Cc: Maxime Ripard Cc: "Michael S . Tsirkin" Cc: Pekka Enberg Cc: Robert Richter Cc: Sumit Semwal Cc: Takashi Iwai Cc: Takashi Iwai Cc: Thomas Zimmermann Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index dd6897f62010..be4ba5867ac5 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -592,6 +592,24 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) return __kmalloc(bytes, flags); } +/** + * krealloc_array - reallocate memory for an array. + * @p: pointer to the memory chunk to reallocate + * @new_n: new number of elements to alloc + * @new_size: new size of a single member of the array + * @flags: the type of memory to allocate (see kmalloc) + */ +static __must_check inline void * +krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t flags) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) + return NULL; + + return krealloc(p, bytes, flags); +} + /** * kcalloc - allocate memory for an array. The memory is set to zero. * @n: number of elements. -- cgit v1.2.3 From 7fb7ab6d618a4dc7ea3f3eafc92388a35b4f8894 Mon Sep 17 00:00:00 2001 From: Zhenhua Huang Date: Mon, 14 Dec 2020 19:04:46 -0800 Subject: mm: fix page_owner initializing issue for arm32 Page owner of pages used by page owner itself used is missing on arm32 targets. The reason is dummy_handle and failure_handle is not initialized correctly. Buddy allocator is used to initialize these two handles. However, buddy allocator is not ready when page owner calls it. This change fixed that by initializing page owner after buddy initialization. The working flow before and after this change are: original logic: 1. allocated memory for page_ext(using memblock). 2. invoke the init callback of page_ext_ops like page_owner(using buddy allocator). 3. initialize buddy. after this change: 1. allocated memory for page_ext(using memblock). 2. initialize buddy. 3. invoke the init callback of page_ext_ops like page_owner(using buddy allocator). with the change, failure/dummy_handle can get its correct value and page owner output for example has the one for page owner itself: Page allocated via order 2, mask 0x6202c0(GFP_USER|__GFP_NOWARN), pid 1006, ts 67278156558 ns PFN 543776 type Unmovable Block 531 type Unmovable Flags 0x0() init_page_owner+0x28/0x2f8 invoke_init_callbacks_flatmem+0x24/0x34 start_kernel+0x33c/0x5d8 Link: https://lkml.kernel.org/r/1603104925-5888-1-git-send-email-zhenhuah@codeaurora.org Signed-off-by: Zhenhua Huang Acked-by: Vlastimil Babka Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_ext.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index cfce186f0c4e..aff81ba31bd8 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -44,8 +44,12 @@ static inline void page_ext_init_flatmem(void) { } extern void page_ext_init(void); +static inline void page_ext_init_flatmem_late(void) +{ +} #else extern void page_ext_init_flatmem(void); +extern void page_ext_init_flatmem_late(void); static inline void page_ext_init(void) { } @@ -76,6 +80,10 @@ static inline void page_ext_init(void) { } +static inline void page_ext_init_flatmem_late(void) +{ +} + static inline void page_ext_init_flatmem(void) { } -- cgit v1.2.3 From 57efa1fe5957694fa541c9062de0a127f0b9acb0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 14 Dec 2020 19:05:44 -0800 Subject: mm/gup: prevent gup_fast from racing with COW during fork Since commit 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes") pages under a FOLL_PIN will not be write protected during COW for fork. This means that pages returned from pin_user_pages(FOLL_WRITE) should not become write protected while the pin is active. However, there is a small race where get_user_pages_fast(FOLL_PIN) can establish a FOLL_PIN at the same time copy_present_page() is write protecting it: CPU 0 CPU 1 get_user_pages_fast() internal_get_user_pages_fast() copy_page_range() pte_alloc_map_lock() copy_present_page() atomic_read(has_pinned) == 0 page_maybe_dma_pinned() == false atomic_set(has_pinned, 1); gup_pgd_range() gup_pte_range() pte_t pte = gup_get_pte(ptep) pte_access_permitted(pte) try_grab_compound_head() pte = pte_wrprotect(pte) set_pte_at(); pte_unmap_unlock() // GUP now returns with a write protected page The first attempt to resolve this by using the write protect caused problems (and was missing a barrrier), see commit f3c64eda3e50 ("mm: avoid early COW write protect games during fork()") Instead wrap copy_p4d_range() with the write side of a seqcount and check the read side around gup_pgd_range(). If there is a collision then get_user_pages_fast() fails and falls back to slow GUP. Slow GUP is safe against this race because copy_page_range() is only called while holding the exclusive side of the mmap_lock on the src mm_struct. [akpm@linux-foundation.org: coding style fixes] Link: https://lore.kernel.org/r/CAHk-=wi=iCnYCARbPGjkVJu9eyYeZ13N64tZYLdOB8CP5Q_PLw@mail.gmail.com Link: https://lkml.kernel.org/r/2-v4-908497cf359a+4782-gup_fork_jgg@nvidia.com Fixes: f3c64eda3e50 ("mm: avoid early COW write protect games during fork()") Signed-off-by: Jason Gunthorpe Suggested-by: Linus Torvalds Reviewed-by: John Hubbard Reviewed-by: Jan Kara Reviewed-by: Peter Xu Acked-by: "Ahmed S. Darwish" [seqcount_t parts] Cc: Andrea Arcangeli Cc: "Aneesh Kumar K.V" Cc: Christoph Hellwig Cc: Hugh Dickins Cc: Jann Horn Cc: Kirill Shutemov Cc: Kirill Tkhai Cc: Leon Romanovsky Cc: Michal Hocko Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5a9238f6caad..915f4f100383 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -446,6 +447,13 @@ struct mm_struct { */ atomic_t has_pinned; + /** + * @write_protect_seq: Locked when any thread is write + * protecting pages mapped by this mm to enforce a later COW, + * for instance during page table copying for fork(). + */ + seqcount_t write_protect_seq; + #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif -- cgit v1.2.3 From 52650c8b466bac399aec213c61d74bfe6f7af1a4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 14 Dec 2020 19:05:48 -0800 Subject: mm/gup: remove the vma allocation from gup_longterm_locked() Long ago there wasn't a FOLL_LONGTERM flag so this DAX check was done by post-processing the VMA list. These days it is trivial to just check each VMA to see if it is DAX before processing it inside __get_user_pages() and return failure if a DAX VMA is encountered with FOLL_LONGTERM. Removing the allocation of the VMA list is a significant speed up for many call sites. Add an IS_ENABLED to vma_is_fsdax so that code generation is unchanged when DAX is compiled out. Remove the dummy version of __gup_longterm_locked() as !CONFIG_CMA already makes memalloc_nocma_save(), check_and_migrate_cma_pages(), and memalloc_nocma_restore() into a NOP. Link: https://lkml.kernel.org/r/0-v1-5551df3ed12e+b8-gup_dax_speedup_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Reviewed-by: Ira Weiny Cc: Dan Williams Cc: John Hubbard Cc: Pavel Tatashin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8667d0cdc71e..1fcc2b00582b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3230,7 +3230,7 @@ static inline bool vma_is_fsdax(struct vm_area_struct *vma) { struct inode *inode; - if (!vma->vm_file) + if (!IS_ENABLED(CONFIG_FS_DAX) || !vma->vm_file) return false; if (!vma_is_dax(vma)) return false; -- cgit v1.2.3 From 462680946b6d982afdda3bf5f7de3c379cb8c97b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 14 Dec 2020 19:06:11 -0800 Subject: mm: remove pagevec_lookup_range_nr_tag() With the merge of commit 2e1692966034 ("ceph: have ceph_writepages_start call pagevec_lookup_range_tag"), nothing calls this anymore. Link: https://lkml.kernel.org/r/20201021193926.101474-1-jlayton@kernel.org Signed-off-by: Jeff Layton Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagevec.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 081d934eda64..ad4ddc17d403 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -43,9 +43,6 @@ static inline unsigned pagevec_lookup(struct pagevec *pvec, unsigned pagevec_lookup_range_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag); -unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec, - struct address_space *mapping, pgoff_t *index, pgoff_t end, - xa_mark_t tag, unsigned max_pages); static inline unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, xa_mark_t tag) { -- cgit v1.2.3 From 30e6a51dbb0594d79dc2a9543659c1d596e2f7d4 Mon Sep 17 00:00:00 2001 From: Hui Su Date: Mon, 14 Dec 2020 19:06:14 -0800 Subject: mm/shmem.c: make shmem_mapping() inline shmem_mapping() isn't worth an out-of-line call from any callsite. So make it inline by - make shmem_aops global - export shmem_aops - inline the shmem_mapping() and replace the direct call 'shmem_aops' with shmem_mapping() in shmem.c. Link: https://lkml.kernel.org/r/20201115165207.GA265355@rlk Signed-off-by: Hui Su Acked-by: Vlastimil Babka Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index a5a5d1d4d7b1..d82b6f396588 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -67,7 +67,11 @@ extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); extern int shmem_lock(struct file *file, int lock, struct user_struct *user); #ifdef CONFIG_SHMEM -extern bool shmem_mapping(struct address_space *mapping); +extern const struct address_space_operations shmem_aops; +static inline bool shmem_mapping(struct address_space *mapping) +{ + return mapping->a_ops == &shmem_aops; +} #else static inline bool shmem_mapping(struct address_space *mapping) { -- cgit v1.2.3 From 1a984c4e8200e0e58bb316f14a4bebb28d32d15a Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Mon, 14 Dec 2020 19:06:24 -0800 Subject: mm: memcontrol: remove unused mod_memcg_obj_state() Since commit 991e7673859e ("mm: memcontrol: account kernel stack per node") there is no user of the mod_memcg_obj_state(). So just remove it. Also rework type of the idx parameter of the mod_objcg_state() from int to enum node_stat_item. Link: https://lkml.kernel.org/r/20201013153504.92602-1-songmuchun@bytedance.com Signed-off-by: Muchun Song Acked-by: Roman Gushchin Acked-by: David Rientjes Reviewed-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: Christopher Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Vlastimil Babka Cc: Yafang Shao Cc: Chris Down Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 922a7f600465..28bf65560084 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -798,8 +798,6 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val); -void mod_memcg_obj_state(void *p, int idx, int val); - static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val) { @@ -1255,10 +1253,6 @@ static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx, mod_node_page_state(page_pgdat(page), idx, val); } -static inline void mod_memcg_obj_state(void *p, int idx, int val) -{ -} - static inline unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, -- cgit v1.2.3 From 013339df116c2ee0d796dd8bfb8f293a2030c063 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 14 Dec 2020 19:06:39 -0800 Subject: mm/rmap: always do TTU_IGNORE_ACCESS Since commit 369ea8242c0f ("mm/rmap: update to new mmu_notifier semantic v2"), the code to check the secondary MMU's page table access bit is broken for !(TTU_IGNORE_ACCESS) because the page is unmapped from the secondary MMU's page table before the check. More specifically for those secondary MMUs which unmap the memory in mmu_notifier_invalidate_range_start() like kvm. However memory reclaim is the only user of !(TTU_IGNORE_ACCESS) or the absence of TTU_IGNORE_ACCESS and it explicitly performs the page table access check before trying to unmap the page. So, at worst the reclaim will miss accesses in a very short window if we remove page table access check in unmapping code. There is an unintented consequence of !(TTU_IGNORE_ACCESS) for the memcg reclaim. From memcg reclaim the page_referenced() only account the accesses from the processes which are in the same memcg of the target page but the unmapping code is considering accesses from all the processes, so, decreasing the effectiveness of memcg reclaim. The simplest solution is to always assume TTU_IGNORE_ACCESS in unmapping code. Link: https://lkml.kernel.org/r/20201104231928.1494083-1-shakeelb@google.com Fixes: 369ea8242c0f ("mm/rmap: update to new mmu_notifier semantic v2") Signed-off-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Hugh Dickins Cc: Jerome Glisse Cc: Vlastimil Babka Cc: Michal Hocko Cc: Andrea Arcangeli Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 3a6adfa70fb0..70085ca1a3fc 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -91,7 +91,6 @@ enum ttu_flags { TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ - TTU_IGNORE_ACCESS = 0x10, /* don't age */ TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */ TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will -- cgit v1.2.3 From a7cb874bfff785d39de6cc847673539cb3540821 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 14 Dec 2020 19:06:45 -0800 Subject: mm: memcg: fix obsolete code comments This patch fixes/removes some obsolete comments in the code related to the kernel memory accounting: - kmem_cache->memcg_params.memcg_caches has been removed by commit 9855609bde03 ("mm: memcg/slab: use a single set of kmem_caches for all accounted allocations") - memcg->kmemcg_id is not used as a gate for kmem accounting since commit 0b8f73e10428 ("mm: memcontrol: clean up alloc, online, offline, free functions") Link: https://lkml.kernel.org/r/20201110184615.311974-1-guro@fb.com Signed-off-by: Roman Gushchin Acked-by: Johannes Weiner Reviewed-by: Shakeel Butt Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 28bf65560084..1f0f64fa3ccd 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -296,7 +296,6 @@ struct mem_cgroup { int tcpmem_pressure; #ifdef CONFIG_MEMCG_KMEM - /* Index in the kmem_cache->memcg_params.memcg_caches array */ int kmemcg_id; enum memcg_kmem_state kmem_state; struct obj_cgroup __rcu *objcg; @@ -1562,9 +1561,8 @@ static inline void memcg_kmem_uncharge(struct mem_cgroup *memcg, } /* - * helper for accessing a memcg's index. It will be used as an index in the - * child cache array in kmem_cache, and also to derive its name. This function - * will return -1 when this is not a kmem-limited memcg. + * A helper for accessing memcg's kmem_id, used for getting + * corresponding LRU lists. */ static inline int memcg_cache_id(struct mem_cgroup *memcg) { -- cgit v1.2.3 From bef8620cd8e0a117c1a0719604052e424eb418f9 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 14 Dec 2020 19:06:49 -0800 Subject: mm: memcg: deprecate the non-hierarchical mode Patch series "mm: memcg: deprecate cgroup v1 non-hierarchical mode", v1. The non-hierarchical cgroup v1 mode is a legacy of early days of the memory controller and doesn't bring any value today. However, it complicates the code and creates many edge cases all over the memory controller code. It's a good time to deprecate it completely. This patchset removes the internal logic, adjusts the user interface and updates the documentation. The alt patch removes some bits of the cgroup core code, which become obsolete. Michal Hocko said: "All that we know today is that we have a warning in place to complain loudly when somebody relies on use_hierarchy=0 with a deeper hierarchy. For all those years we have seen _zero_ reports that would describe a sensible usecase. Moreover we (SUSE) have backported this warning into old distribution kernels (since 3.0 based kernels) to extend the coverage and didn't hear even for users who adopt new kernels only very slowly. The only report we have seen so far was a LTP test suite which doesn't really reflect any real life usecase" This patch (of 3): The non-hierarchical cgroup v1 mode is a legacy of early days of the memory controller and doesn't bring any value today. However, it complicates the code and creates many edge cases all over the memory controller code. It's a good time to deprecate it completely. Functionally this patch enabled is by default for all cgroups and forbids switching it off. Nothing changes if cgroup v2 is used: hierarchical mode was enforced from scratch. To protect the ABI memory.use_hierarchy interface is preserved with a limited functionality: reading always returns "1", writing of "1" passes silently, writing of any other value fails with -EINVAL and a warning to dmesg (on the first occasion). Link: https://lkml.kernel.org/r/20201110220800.929549-1-guro@fb.com Link: https://lkml.kernel.org/r/20201110220800.929549-2-guro@fb.com Signed-off-by: Roman Gushchin Acked-by: Michal Hocko Reviewed-by: Shakeel Butt Acked-by: David Rientjes Acked-by: Johannes Weiner Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1f0f64fa3ccd..dd992b81bcb7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -234,11 +234,6 @@ struct mem_cgroup { /* vmpressure notifications */ struct vmpressure vmpressure; - /* - * Should the accounting and control be hierarchical, per subtree? - */ - bool use_hierarchy; - /* * Should the OOM killer kill all belonging tasks, had it kill one? */ @@ -588,8 +583,6 @@ static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, { if (root == memcg) return true; - if (!root->use_hierarchy) - return false; return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup); } -- cgit v1.2.3 From 9d9d341df4d519d96e7927941d91f5785c5cea07 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 14 Dec 2020 19:06:55 -0800 Subject: cgroup: remove obsoleted broken_hierarchy and warned_broken_hierarchy With the deprecation of the non-hierarchical mode of the memory controller there are no more examples of broken hierarchies left. Let's remove the cgroup core code which was supposed to print warnings about creating of broken hierarchies. Link: https://lkml.kernel.org/r/20201110220800.929549-4-guro@fb.com Signed-off-by: Roman Gushchin Reviewed-by: Shakeel Butt Acked-by: David Rientjes Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup-defs.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index fee0b5547cd0..559ee05f86b2 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -668,21 +668,6 @@ struct cgroup_subsys { */ bool threaded:1; - /* - * If %false, this subsystem is properly hierarchical - - * configuration, resource accounting and restriction on a parent - * cgroup cover those of its children. If %true, hierarchy support - * is broken in some ways - some subsystems ignore hierarchy - * completely while others are only implemented half-way. - * - * It's now disallowed to create nested cgroups if the subsystem is - * broken and cgroup core will emit a warning message on such - * cases. Eventually, all subsystems will be made properly - * hierarchical and this will go away. - */ - bool broken_hierarchy:1; - bool warned_broken_hierarchy:1; - /* the following two fields are initialized automtically during boot */ int id; const char *name; -- cgit v1.2.3 From da3ceeff923e3bc750a8423c840462760c463926 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Mon, 14 Dec 2020 19:07:04 -0800 Subject: mm: memcg/slab: rename *_lruvec_slab_state to *_lruvec_kmem_state The *_lruvec_slab_state is also suitable for pages allocated from buddy, not just for the slab objects. But the function name seems to tell us that only slab object is applicable. So we can rename the keyword of slab to kmem. Link: https://lkml.kernel.org/r/20201117085249.24319-1-songmuchun@bytedance.com Signed-off-by: Muchun Song Acked-by: Roman Gushchin Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index dd992b81bcb7..71c961452d9a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -788,15 +788,15 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); -void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val); +void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val); -static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx, +static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) { unsigned long flags; local_irq_save(flags); - __mod_lruvec_slab_state(p, idx, val); + __mod_lruvec_kmem_state(p, idx, val); local_irq_restore(flags); } @@ -1229,7 +1229,7 @@ static inline void mod_lruvec_page_state(struct page *page, mod_node_page_state(page_pgdat(page), idx, val); } -static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, +static inline void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) { struct page *page = virt_to_head_page(p); @@ -1237,7 +1237,7 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, __mod_node_page_state(page_pgdat(page), idx, val); } -static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx, +static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) { struct page *page = virt_to_head_page(p); @@ -1332,14 +1332,14 @@ static inline void __dec_lruvec_page_state(struct page *page, __mod_lruvec_page_state(page, idx, -1); } -static inline void __inc_lruvec_slab_state(void *p, enum node_stat_item idx) +static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx) { - __mod_lruvec_slab_state(p, idx, 1); + __mod_lruvec_kmem_state(p, idx, 1); } -static inline void __dec_lruvec_slab_state(void *p, enum node_stat_item idx) +static inline void __dec_lruvec_kmem_state(void *p, enum node_stat_item idx) { - __mod_lruvec_slab_state(p, idx, -1); + __mod_lruvec_kmem_state(p, idx, -1); } /* idx can be of type enum memcg_stat_item or node_stat_item */ -- cgit v1.2.3 From c47d5032ed3002311a4188eae51f4641ec436beb Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 14 Dec 2020 19:07:14 -0800 Subject: mm: move lruvec stats update functions to vmstat.h Patch series "memcg: add pagetable comsumption to memory.stat", v2. Many workloads consumes significant amount of memory in pagetables. One specific use-case is the user space network driver which mmaps the application memory to provide zero copy transfer. This driver can consume a large amount memory in page tables. This patch series exposes the pagetable comsumption for each memory cgroup. This patch (of 2): This does not change any functionality and only move the functions which update the lruvec stats to vmstat.h from memcontrol.h. The main reason for this patch is to be able to use these functions in the page table contructor function which is defined in mm.h and we can not include the memcontrol.h in that file. Also this is a better place for this interface in general. The lruvec abstraction, while invented for memcg, isn't specific to memcg at all. Link: https://lkml.kernel.org/r/20201130212541.2781790-2-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Roman Gushchin Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 111 --------------------------------------------- include/linux/vmstat.h | 104 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 111 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 71c961452d9a..f530d634f055 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -786,8 +786,6 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); -void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, - int val); void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val); static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, @@ -810,43 +808,6 @@ static inline void mod_memcg_lruvec_state(struct lruvec *lruvec, local_irq_restore(flags); } -static inline void mod_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) -{ - unsigned long flags; - - local_irq_save(flags); - __mod_lruvec_state(lruvec, idx, val); - local_irq_restore(flags); -} - -static inline void __mod_lruvec_page_state(struct page *page, - enum node_stat_item idx, int val) -{ - struct page *head = compound_head(page); /* rmap on tail pages */ - pg_data_t *pgdat = page_pgdat(page); - struct lruvec *lruvec; - - /* Untracked pages have no memcg, no lruvec. Update only the node */ - if (!head->mem_cgroup) { - __mod_node_page_state(pgdat, idx, val); - return; - } - - lruvec = mem_cgroup_lruvec(head->mem_cgroup, pgdat); - __mod_lruvec_state(lruvec, idx, val); -} - -static inline void mod_lruvec_page_state(struct page *page, - enum node_stat_item idx, int val) -{ - unsigned long flags; - - local_irq_save(flags); - __mod_lruvec_page_state(page, idx, val); - local_irq_restore(flags); -} - unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, unsigned long *total_scanned); @@ -1205,30 +1166,6 @@ static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec, { } -static inline void __mod_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) -{ - __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); -} - -static inline void mod_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) -{ - mod_node_page_state(lruvec_pgdat(lruvec), idx, val); -} - -static inline void __mod_lruvec_page_state(struct page *page, - enum node_stat_item idx, int val) -{ - __mod_node_page_state(page_pgdat(page), idx, val); -} - -static inline void mod_lruvec_page_state(struct page *page, - enum node_stat_item idx, int val) -{ - mod_node_page_state(page_pgdat(page), idx, val); -} - static inline void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) { @@ -1308,30 +1245,6 @@ static inline void __dec_memcg_page_state(struct page *page, __mod_memcg_page_state(page, idx, -1); } -static inline void __inc_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - __mod_lruvec_state(lruvec, idx, 1); -} - -static inline void __dec_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - __mod_lruvec_state(lruvec, idx, -1); -} - -static inline void __inc_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - __mod_lruvec_page_state(page, idx, 1); -} - -static inline void __dec_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - __mod_lruvec_page_state(page, idx, -1); -} - static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx) { __mod_lruvec_kmem_state(p, idx, 1); @@ -1370,30 +1283,6 @@ static inline void dec_memcg_page_state(struct page *page, mod_memcg_page_state(page, idx, -1); } -static inline void inc_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - mod_lruvec_state(lruvec, idx, 1); -} - -static inline void dec_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - mod_lruvec_state(lruvec, idx, -1); -} - -static inline void inc_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - mod_lruvec_page_state(page, idx, 1); -} - -static inline void dec_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - mod_lruvec_page_state(page, idx, -1); -} - static inline struct lruvec *parent_lruvec(struct lruvec *lruvec) { struct mem_cgroup *memcg; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 322dcbfcc933..773135fc6e19 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -450,4 +450,108 @@ static inline const char *vm_event_name(enum vm_event_item item) } #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ +#ifdef CONFIG_MEMCG + +void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, + int val); + +static inline void mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) +{ + unsigned long flags; + + local_irq_save(flags); + __mod_lruvec_state(lruvec, idx, val); + local_irq_restore(flags); +} + +void __mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val); + +static inline void mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + unsigned long flags; + + local_irq_save(flags); + __mod_lruvec_page_state(page, idx, val); + local_irq_restore(flags); +} + +#else + +static inline void __mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) +{ + __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); +} + +static inline void mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) +{ + mod_node_page_state(lruvec_pgdat(lruvec), idx, val); +} + +static inline void __mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + __mod_node_page_state(page_pgdat(page), idx, val); +} + +static inline void mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + mod_node_page_state(page_pgdat(page), idx, val); +} + +#endif /* CONFIG_MEMCG */ + +static inline void __inc_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + __mod_lruvec_state(lruvec, idx, 1); +} + +static inline void __dec_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + __mod_lruvec_state(lruvec, idx, -1); +} + +static inline void __inc_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + __mod_lruvec_page_state(page, idx, 1); +} + +static inline void __dec_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + __mod_lruvec_page_state(page, idx, -1); +} + +static inline void inc_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + mod_lruvec_state(lruvec, idx, 1); +} + +static inline void dec_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + mod_lruvec_state(lruvec, idx, -1); +} + +static inline void inc_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + mod_lruvec_page_state(page, idx, 1); +} + +static inline void dec_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + mod_lruvec_page_state(page, idx, -1); +} + #endif /* _LINUX_VMSTAT_H */ -- cgit v1.2.3 From f0c0c115fb81940f4dba0644ac2a8a43b39c83f3 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 14 Dec 2020 19:07:17 -0800 Subject: mm: memcontrol: account pagetables per node For many workloads, pagetable consumption is significant and it makes sense to expose it in the memory.stat for the memory cgroups. However at the moment, the pagetables are accounted per-zone. Converting them to per-node and using the right interface will correctly account for the memory cgroups as well. [akpm@linux-foundation.org: export __mod_lruvec_page_state to modules for arch/mips/kvm/] Link: https://lkml.kernel.org/r/20201130212541.2781790-3-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Roman Gushchin Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 ++++---- include/linux/mmzone.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index db6ae4d3fb4e..5bbbf4aeee94 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2203,7 +2203,7 @@ static inline bool pgtable_pte_page_ctor(struct page *page) if (!ptlock_init(page)) return false; __SetPageTable(page); - inc_zone_page_state(page, NR_PAGETABLE); + inc_lruvec_page_state(page, NR_PAGETABLE); return true; } @@ -2211,7 +2211,7 @@ static inline void pgtable_pte_page_dtor(struct page *page) { ptlock_free(page); __ClearPageTable(page); - dec_zone_page_state(page, NR_PAGETABLE); + dec_lruvec_page_state(page, NR_PAGETABLE); } #define pte_offset_map_lock(mm, pmd, address, ptlp) \ @@ -2298,7 +2298,7 @@ static inline bool pgtable_pmd_page_ctor(struct page *page) if (!pmd_ptlock_init(page)) return false; __SetPageTable(page); - inc_zone_page_state(page, NR_PAGETABLE); + inc_lruvec_page_state(page, NR_PAGETABLE); return true; } @@ -2306,7 +2306,7 @@ static inline void pgtable_pmd_page_dtor(struct page *page) { pmd_ptlock_free(page); __ClearPageTable(page); - dec_zone_page_state(page, NR_PAGETABLE); + dec_lruvec_page_state(page, NR_PAGETABLE); } /* diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fb3bf696c05e..cca2a4443c9c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -152,7 +152,6 @@ enum zone_stat_item { NR_ZONE_UNEVICTABLE, NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ - NR_PAGETABLE, /* used for pagetables */ /* Second 128 byte cacheline */ NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) @@ -207,6 +206,7 @@ enum node_stat_item { #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) NR_KERNEL_SCS_KB, /* measured in KiB */ #endif + NR_PAGETABLE, /* used for pagetables */ NR_VM_NODE_STAT_ITEMS }; -- cgit v1.2.3 From d3f5ffcacd1528736471bc78f03f06da6c4551cc Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 14 Dec 2020 19:07:45 -0800 Subject: mm: cleanup: remove unused tsk arg from __access_remote_vm Despite a comment that said that page fault accounting would be charged to whatever task_struct* was passed into __access_remote_vm(), the tsk argument was actually unused. Making page fault accounting actually use this task struct is quite a project, so there is no point in keeping the tsk argument. Delete both the comment, and the argument. [rppt@linux.ibm.com: changelog addition] Link: https://lkml.kernel.org/r/20201026074137.4147787-1-jhubbard@nvidia.com Signed-off-by: John Hubbard Reviewed-by: Mike Rapoport Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5bbbf4aeee94..1d8e84bf718a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1716,8 +1716,8 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); -extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, void *buf, int len, unsigned int gup_flags); +extern int __access_remote_vm(struct mm_struct *mm, unsigned long addr, + void *buf, int len, unsigned int gup_flags); long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, -- cgit v1.2.3 From 2b5067a8143e34aa3fa57a20fb8a3c40d905f942 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Mon, 14 Dec 2020 19:07:55 -0800 Subject: mm: mmap_lock: add tracepoints around lock acquisition The goal of these tracepoints is to be able to debug lock contention issues. This lock is acquired on most (all?) mmap / munmap / page fault operations, so a multi-threaded process which does a lot of these can experience significant contention. We trace just before we start acquisition, when the acquisition returns (whether it succeeded or not), and when the lock is released (or downgraded). The events are broken out by lock type (read / write). The events are also broken out by memcg path. For container-based workloads, users often think of several processes in a memcg as a single logical "task", so collecting statistics at this level is useful. The end goal is to get latency information. This isn't directly included in the trace events. Instead, users are expected to compute the time between "start locking" and "acquire returned", using e.g. synthetic events or BPF. The benefit we get from this is simpler code. Because we use tracepoint_enabled() to decide whether or not to trace, this patch has effectively no overhead unless tracepoints are enabled at runtime. If tracepoints are enabled, there is a performance impact, but how much depends on exactly what e.g. the BPF program does. [axelrasmussen@google.com: fix use-after-free race and css ref leak in tracepoints] Link: https://lkml.kernel.org/r/20201130233504.3725241-1-axelrasmussen@google.com [axelrasmussen@google.com: v3] Link: https://lkml.kernel.org/r/20201207213358.573750-1-axelrasmussen@google.com [rostedt@goodmis.org: in-depth examples of tracepoint_enabled() usage, and per-cpu-per-context buffer design] Link: https://lkml.kernel.org/r/20201105211739.568279-2-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Acked-by: Vlastimil Babka Cc: Steven Rostedt Cc: Ingo Molnar Cc: Michel Lespinasse Cc: Daniel Jordan Cc: Jann Horn Cc: Chinwen Chang Cc: Davidlohr Bueso Cc: David Rientjes Cc: Laurent Dufour Cc: Yafang Shao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmap_lock.h | 94 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 18e7eae9b5ba..0540f0156f58 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -1,11 +1,65 @@ #ifndef _LINUX_MMAP_LOCK_H #define _LINUX_MMAP_LOCK_H +#include +#include #include +#include +#include +#include #define MMAP_LOCK_INITIALIZER(name) \ .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), +DECLARE_TRACEPOINT(mmap_lock_start_locking); +DECLARE_TRACEPOINT(mmap_lock_acquire_returned); +DECLARE_TRACEPOINT(mmap_lock_released); + +#ifdef CONFIG_TRACING + +void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write); +void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, + bool success); +void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write); + +static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm, + bool write) +{ + if (tracepoint_enabled(mmap_lock_start_locking)) + __mmap_lock_do_trace_start_locking(mm, write); +} + +static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm, + bool write, bool success) +{ + if (tracepoint_enabled(mmap_lock_acquire_returned)) + __mmap_lock_do_trace_acquire_returned(mm, write, success); +} + +static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) +{ + if (tracepoint_enabled(mmap_lock_released)) + __mmap_lock_do_trace_released(mm, write); +} + +#else /* !CONFIG_TRACING */ + +static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm, + bool write) +{ +} + +static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm, + bool write, bool success) +{ +} + +static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) +{ +} + +#endif /* CONFIG_TRACING */ + static inline void mmap_init_lock(struct mm_struct *mm) { init_rwsem(&mm->mmap_lock); @@ -13,57 +67,86 @@ static inline void mmap_init_lock(struct mm_struct *mm) static inline void mmap_write_lock(struct mm_struct *mm) { + __mmap_lock_trace_start_locking(mm, true); down_write(&mm->mmap_lock); + __mmap_lock_trace_acquire_returned(mm, true, true); } static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) { + __mmap_lock_trace_start_locking(mm, true); down_write_nested(&mm->mmap_lock, subclass); + __mmap_lock_trace_acquire_returned(mm, true, true); } static inline int mmap_write_lock_killable(struct mm_struct *mm) { - return down_write_killable(&mm->mmap_lock); + int ret; + + __mmap_lock_trace_start_locking(mm, true); + ret = down_write_killable(&mm->mmap_lock); + __mmap_lock_trace_acquire_returned(mm, true, ret == 0); + return ret; } static inline bool mmap_write_trylock(struct mm_struct *mm) { - return down_write_trylock(&mm->mmap_lock) != 0; + bool ret; + + __mmap_lock_trace_start_locking(mm, true); + ret = down_write_trylock(&mm->mmap_lock) != 0; + __mmap_lock_trace_acquire_returned(mm, true, ret); + return ret; } static inline void mmap_write_unlock(struct mm_struct *mm) { up_write(&mm->mmap_lock); + __mmap_lock_trace_released(mm, true); } static inline void mmap_write_downgrade(struct mm_struct *mm) { downgrade_write(&mm->mmap_lock); + __mmap_lock_trace_acquire_returned(mm, false, true); } static inline void mmap_read_lock(struct mm_struct *mm) { + __mmap_lock_trace_start_locking(mm, false); down_read(&mm->mmap_lock); + __mmap_lock_trace_acquire_returned(mm, false, true); } static inline int mmap_read_lock_killable(struct mm_struct *mm) { - return down_read_killable(&mm->mmap_lock); + int ret; + + __mmap_lock_trace_start_locking(mm, false); + ret = down_read_killable(&mm->mmap_lock); + __mmap_lock_trace_acquire_returned(mm, false, ret == 0); + return ret; } static inline bool mmap_read_trylock(struct mm_struct *mm) { - return down_read_trylock(&mm->mmap_lock) != 0; + bool ret; + + __mmap_lock_trace_start_locking(mm, false); + ret = down_read_trylock(&mm->mmap_lock) != 0; + __mmap_lock_trace_acquire_returned(mm, false, ret); + return ret; } static inline void mmap_read_unlock(struct mm_struct *mm) { up_read(&mm->mmap_lock); + __mmap_lock_trace_released(mm, false); } static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm) { - if (down_read_trylock(&mm->mmap_lock)) { + if (mmap_read_trylock(mm)) { rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_); return true; } @@ -73,6 +156,7 @@ static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm) static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) { up_read_non_owner(&mm->mmap_lock); + __mmap_lock_trace_released(mm, false); } static inline void mmap_assert_locked(struct mm_struct *mm) -- cgit v1.2.3 From 0966aeb404e854e3377a10fcd01be46f19055bc6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 14 Dec 2020 19:08:02 -0800 Subject: mm: move free_unref_page to mm/internal.h Code outside mm/ should not be calling free_unref_page(). Also move free_unref_page_list(). Link: https://lkml.kernel.org/r/20201125034655.27687-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Reviewed-by: Mike Rapoport Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index c603237e006c..6e479e9c48ce 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -580,8 +580,6 @@ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); -extern void free_unref_page(struct page *page); -extern void free_unref_page_list(struct list_head *list); struct page_frag_cache; extern void __page_frag_cache_drain(struct page *page, unsigned int count); -- cgit v1.2.3 From cd544fd1dc9293c6702fab6effa63dac1cc67e99 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 14 Dec 2020 19:08:13 -0800 Subject: mremap: don't allow MREMAP_DONTUNMAP on special_mappings and aio As kernel expect to see only one of such mappings, any further operations on the VMA-copy may be unexpected by the kernel. Maybe it's being on the safe side, but there doesn't seem to be any expected use-case for this, so restrict it now. Link: https://lkml.kernel.org/r/20201013013416.390574-4-dima@arista.com Fixes: commit e346b3813067 ("mm/mremap: add MREMAP_DONTUNMAP to mremap()") Signed-off-by: Dmitry Safonov Cc: Alexander Viro Cc: Andy Lutomirski Cc: Brian Geffon Cc: Catalin Marinas Cc: Dan Carpenter Cc: Dan Williams Cc: Dave Jiang Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: John Hubbard Cc: "Kirill A. Shutemov" Cc: Mike Kravetz Cc: Minchan Kim Cc: Ralph Campbell Cc: Russell King Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vishal Verma Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1d8e84bf718a..dacc889744b1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -558,7 +558,7 @@ struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); int (*split)(struct vm_area_struct * area, unsigned long addr); - int (*mremap)(struct vm_area_struct * area); + int (*mremap)(struct vm_area_struct *area, unsigned long flags); vm_fault_t (*fault)(struct vm_fault *vmf); vm_fault_t (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); -- cgit v1.2.3 From dd3b614f858d88f33e0cf8b7353e2ad937e71da3 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 14 Dec 2020 19:08:17 -0800 Subject: vm_ops: rename .split() callback to .may_split() Rename the callback to reflect that it's not called *on* or *after* split, but rather some time before the splitting to check if it's possible. Link: https://lkml.kernel.org/r/20201013013416.390574-5-dima@arista.com Signed-off-by: Dmitry Safonov Cc: Alexander Viro Cc: Andy Lutomirski Cc: Brian Geffon Cc: Catalin Marinas Cc: Dan Carpenter Cc: Dan Williams Cc: Dave Jiang Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: John Hubbard Cc: "Kirill A. Shutemov" Cc: Mike Kravetz Cc: Minchan Kim Cc: Ralph Campbell Cc: Russell King Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vishal Verma Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index dacc889744b1..5d188b8611dc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -557,7 +557,8 @@ enum page_entry_size { struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); - int (*split)(struct vm_area_struct * area, unsigned long addr); + /* Called any time before splitting to check if it's allowed */ + int (*may_split)(struct vm_area_struct *area, unsigned long addr); int (*mremap)(struct vm_area_struct *area, unsigned long flags); vm_fault_t (*fault)(struct vm_fault *vmf); vm_fault_t (*huge_fault)(struct vm_fault *vmf, -- cgit v1.2.3 From 95d6c701f4ca7c44dc148d664f604541266a2333 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 14 Dec 2020 19:08:34 -0800 Subject: mm: extract might_alloc() debug check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extracted from slab.h, which seems to have the most complete version including the correct might_sleep() check. Roll it out to slob.c. Motivated by a discussion with Paul about possibly changing call_rcu behaviour to allocate memory, but only roughly every 500th call. There are a lot fewer places in the kernel that care about whether allocating memory is allowed or not (due to deadlocks with reclaim code) than places that care whether sleeping is allowed. But debugging these also tends to be a lot harder, so nice descriptive checks could come in handy. I might have some use eventually for annotations in drivers/gpu. Note that unlike fs_reclaim_acquire/release gfpflags_allow_blocking does not consult the PF_MEMALLOC flags. But there is no flag equivalent for GFP_NOWAIT, hence this check can't go wrong due to memalloc_no*_save/restore contexts. Willy is working on a patch series which might change this: https://lore.kernel.org/linux-mm/20200625113122.7540-7-willy@infradead.org/ I think best would be if that updates gfpflags_allow_blocking(), since there's a ton of callers all over the place for that already. Link: https://lkml.kernel.org/r/20201125162532.1299794-3-daniel.vetter@ffwll.ch Signed-off-by: Daniel Vetter Acked-by: Vlastimil Babka Acked-by: Paul E. McKenney Reviewed-by: Jason Gunthorpe Cc: Randy Dunlap Cc: Paul E. McKenney Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Vlastimil Babka Cc: Mathieu Desnoyers Cc: Sebastian Andrzej Siewior Cc: Michel Lespinasse Cc: Daniel Vetter Cc: Waiman Long Cc: Thomas Gleixner Cc: Randy Dunlap Cc: Dave Chinner Cc: Qian Cai Cc: "Matthew Wilcox (Oracle)" Cc: Christian König Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Maarten Lankhorst Cc: Thomas Hellström (Intel) Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index d5ece7a9a403..a11a61b5226f 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -180,6 +180,22 @@ static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } static inline void fs_reclaim_release(gfp_t gfp_mask) { } #endif +/** + * might_alloc - Mark possible allocation sites + * @gfp_mask: gfp_t flags that would be used to allocate + * + * Similar to might_sleep() and other annotations, this can be used in functions + * that might allocate, but often don't. Compiles to nothing without + * CONFIG_LOCKDEP. Includes a conditional might_sleep() if @gfp allows blocking. + */ +static inline void might_alloc(gfp_t gfp_mask) +{ + fs_reclaim_acquire(gfp_mask); + fs_reclaim_release(gfp_mask); + + might_sleep_if(gfpflags_allow_blocking(gfp_mask)); +} + /** * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope. * -- cgit v1.2.3 From 96e2db456135db0cf2476b6890f1e8b2fdcf21eb Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Mon, 14 Dec 2020 19:08:49 -0800 Subject: mm/vmalloc: rework the drain logic A current "lazy drain" model suffers from at least two issues. First one is related to the unsorted list of vmap areas, thus in order to identify the [min:max] range of areas to be drained, it requires a full list scan. What is a time consuming if the list is too long. Second one and as a next step is about merging all fragments with a free space. What is also a time consuming because it has to iterate over entire list which holds outstanding lazy areas. See below the "preemptirqsoff" tracer that illustrates a high latency. It is ~24676us. Our workloads like audio and video are effected by such long latency: tracer: preemptirqsoff preemptirqsoff latency trace v1.1.5 on 4.9.186-perf+ -------------------------------------------------------------------- latency: 24676 us, #4/4, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 P:8) ----------------- | task: crtc_commit:112-261 (uid:0 nice:0 policy:1 rt_prio:16) ----------------- => started at: __purge_vmap_area_lazy => ended at: __purge_vmap_area_lazy _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / delay cmd pid ||||| time | caller \ / ||||| \ | / crtc_com-261 1...1 1us*: _raw_spin_lock <-__purge_vmap_area_lazy [...] crtc_com-261 1...1 24675us : _raw_spin_unlock <-__purge_vmap_area_lazy crtc_com-261 1...1 24677us : trace_preempt_on <-__purge_vmap_area_lazy crtc_com-261 1...1 24683us : => free_vmap_area_noflush => remove_vm_area => __vunmap => vfree => drm_property_free_blob => drm_mode_object_unreference => drm_property_unreference_blob => __drm_atomic_helper_crtc_destroy_state => sde_crtc_destroy_state => drm_atomic_state_default_clear => drm_atomic_state_clear => drm_atomic_state_free => complete_commit => _msm_drm_commit_work_cb => kthread_worker_fn => kthread => ret_from_fork To address those two issues we can redesign a purging of the outstanding lazy areas. Instead of queuing vmap areas to the list, we replace it by the separate rb-tree. In hat case an area is located in the tree/list in ascending order. It will give us below advantages: a) Outstanding vmap areas are merged creating bigger coalesced blocks, thus it becomes less fragmented. b) It is possible to calculate a flush range [min:max] without scanning all elements. It is O(1) access time or complexity; c) The final merge of areas with the rb-tree that represents a free space is faster because of (a). As a result the lock contention is also reduced. Link: https://lkml.kernel.org/r/20201116220033.1837-2-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Cc: Hillf Danton Cc: Michal Hocko Cc: Matthew Wilcox Cc: Oleksiy Avramchenko Cc: Steven Rostedt Cc: Minchan Kim Cc: huang ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 938eaf9517e2..80c0181c411d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -72,16 +72,14 @@ struct vmap_area { struct list_head list; /* address sorted list */ /* - * The following three variables can be packed, because - * a vmap_area object is always one of the three states: + * The following two variables can be packed, because + * a vmap_area object can be either: * 1) in "free" tree (root is vmap_area_root) - * 2) in "busy" tree (root is free_vmap_area_root) - * 3) in purge list (head is vmap_purge_list) + * 2) or "busy" tree (root is free_vmap_area_root) */ union { unsigned long subtree_max_size; /* in "free" tree */ struct vm_struct *vm; /* in "busy" tree */ - struct llist_node purge_list; /* in purge list */ }; }; -- cgit v1.2.3 From 03e92a5e097d679acbd1fb4d2ae238a38158aa0b Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 14 Dec 2020 19:09:32 -0800 Subject: ia64: remove custom __early_pfn_to_nid() The ia64 implementation of __early_pfn_to_nid() essentially relies on the same data as the generic implementation. The correspondence between memory ranges and nodes is set in memblock during early memory initialization in register_active_ranges() function. The initialization of sparsemem that requires early_pfn_to_nid() happens later and it can use the memblock information like the other architectures. Link: https://lkml.kernel.org/r/20201101170454.9567-3-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Alexey Dobriyan Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greg Ungerer Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Matt Turner Cc: Meelis Roos Cc: Michael Schmitz Cc: Russell King Cc: Tony Luck Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 --- include/linux/mmzone.h | 11 ----------- 2 files changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5d188b8611dc..48fa3e71be1a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2434,9 +2434,6 @@ static inline int early_pfn_to_nid(unsigned long pfn) #else /* please see mm/page_alloc.c */ extern int __meminit early_pfn_to_nid(unsigned long pfn); -/* there is a per-arch backend function. */ -extern int __meminit __early_pfn_to_nid(unsigned long pfn, - struct mminit_pfnnid_cache *state); #endif extern void set_dma_reserve(unsigned long new_dma_reserve); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index cca2a4443c9c..16fb5522a74f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1428,17 +1428,6 @@ void sparse_init(void); #define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ -/* - * During memory init memblocks map pfns to nids. The search is expensive and - * this caches recent lookups. The implementation of __early_pfn_to_nid - * may treat start/end as pfns or sections. - */ -struct mminit_pfnnid_cache { - unsigned long last_start; - unsigned long last_end; - int last_nid; -}; - /* * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we * need to check pfn validity within that MAX_ORDER_NR_PAGES block. -- cgit v1.2.3 From 5e545df3292fbd3d5963c68980f1527ead2a2b3f Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 14 Dec 2020 19:09:55 -0800 Subject: arm: remove CONFIG_ARCH_HAS_HOLES_MEMORYMODEL ARM is the only architecture that defines CONFIG_ARCH_HAS_HOLES_MEMORYMODEL which in turn enables memmap_valid_within() function that is intended to verify existence of struct page associated with a pfn when there are holes in the memory map. However, the ARCH_HAS_HOLES_MEMORYMODEL also enables HAVE_ARCH_PFN_VALID and arch-specific pfn_valid() implementation that also deals with the holes in the memory map. The only two users of memmap_valid_within() call this function after a call to pfn_valid() so the memmap_valid_within() check becomes redundant. Remove CONFIG_ARCH_HAS_HOLES_MEMORYMODEL and memmap_valid_within() and rely entirely on ARM's implementation of pfn_valid() that is now enabled unconditionally. Link: https://lkml.kernel.org/r/20201101170454.9567-9-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Alexey Dobriyan Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greg Ungerer Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Matt Turner Cc: Meelis Roos Cc: Michael Schmitz Cc: Russell King Cc: Tony Luck Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 31 ------------------------------- 1 file changed, 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 16fb5522a74f..6e0025b5a88f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1440,37 +1440,6 @@ void sparse_init(void); #define pfn_valid_within(pfn) (1) #endif -#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL -/* - * pfn_valid() is meant to be able to tell if a given PFN has valid memmap - * associated with it or not. This means that a struct page exists for this - * pfn. The caller cannot assume the page is fully initialized in general. - * Hotplugable pages might not have been onlined yet. pfn_to_online_page() - * will ensure the struct page is fully online and initialized. Special pages - * (e.g. ZONE_DEVICE) are never onlined and should be treated accordingly. - * - * In FLATMEM, it is expected that holes always have valid memmap as long as - * there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed - * that a valid section has a memmap for the entire section. - * - * However, an ARM, and maybe other embedded architectures in the future - * free memmap backing holes to save memory on the assumption the memmap is - * never used. The page_zone linkages are then broken even though pfn_valid() - * returns true. A walker of the full memmap must then do this additional - * check to ensure the memmap they are looking at is sane by making sure - * the zone and PFN linkages are still valid. This is expensive, but walkers - * of the full memmap are extremely rare. - */ -bool memmap_valid_within(unsigned long pfn, - struct page *page, struct zone *zone); -#else -static inline bool memmap_valid_within(unsigned long pfn, - struct page *page, struct zone *zone) -{ - return true; -} -#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ - #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_MMZONE_H */ -- cgit v1.2.3 From 77bc7fd607dee2ffb28daff6d0dd8ae42af61ea8 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 14 Dec 2020 19:10:20 -0800 Subject: mm: introduce debug_pagealloc_{map,unmap}_pages() helpers Patch series "arch, mm: improve robustness of direct map manipulation", v7. During recent discussion about KVM protected memory, David raised a concern about usage of __kernel_map_pages() outside of DEBUG_PAGEALLOC scope [1]. Indeed, for architectures that define CONFIG_ARCH_HAS_SET_DIRECT_MAP it is possible that __kernel_map_pages() would fail, but since this function is void, the failure will go unnoticed. Moreover, there's lack of consistency of __kernel_map_pages() semantics across architectures as some guard this function with #ifdef DEBUG_PAGEALLOC, some refuse to update the direct map if page allocation debugging is disabled at run time and some allow modifying the direct map regardless of DEBUG_PAGEALLOC settings. This set straightens this out by restoring dependency of __kernel_map_pages() on DEBUG_PAGEALLOC and updating the call sites accordingly. Since currently the only user of __kernel_map_pages() outside DEBUG_PAGEALLOC is hibernation, it is updated to make direct map accesses there more explicit. [1] https://lore.kernel.org/lkml/2759b4bf-e1e3-d006-7d86-78a40348269d@redhat.com This patch (of 4): When CONFIG_DEBUG_PAGEALLOC is enabled, it unmaps pages from the kernel direct mapping after free_pages(). The pages than need to be mapped back before they could be used. Theese mapping operations use __kernel_map_pages() guarded with with debug_pagealloc_enabled(). The only place that calls __kernel_map_pages() without checking whether DEBUG_PAGEALLOC is enabled is the hibernation code that presumes availability of this function when ARCH_HAS_SET_DIRECT_MAP is set. Still, on arm64, __kernel_map_pages() will bail out when DEBUG_PAGEALLOC is not enabled but set_direct_map_invalid_noflush() may render some pages not present in the direct map and hibernation code won't be able to save such pages. To make page allocation debugging and hibernation interaction more robust, the dependency on DEBUG_PAGEALLOC or ARCH_HAS_SET_DIRECT_MAP has to be made more explicit. Start with combining the guard condition and the call to __kernel_map_pages() into debug_pagealloc_map_pages() and debug_pagealloc_unmap_pages() functions to emphasize that __kernel_map_pages() should not be called without DEBUG_PAGEALLOC and use these new functions to map/unmap pages when page allocation debugging is enabled. Link: https://lkml.kernel.org/r/20201109192128.960-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20201109192128.960-2-rppt@kernel.org Signed-off-by: Mike Rapoport Reviewed-by: David Hildenbrand Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Cc: Albert Ou Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Lameter Cc: "David S. Miller" Cc: Dave Hansen Cc: David Rientjes Cc: "Edgecombe, Rick P" Cc: "H. Peter Anvin" Cc: Heiko Carstens Cc: Ingo Molnar Cc: Joonsoo Kim Cc: Len Brown Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Pavel Machek Cc: Pekka Enberg Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Cc: Rafael J. Wysocki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 48fa3e71be1a..0f4c34672a13 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2943,12 +2943,27 @@ kernel_map_pages(struct page *page, int numpages, int enable) { __kernel_map_pages(page, numpages, enable); } + +static inline void debug_pagealloc_map_pages(struct page *page, int numpages) +{ + if (debug_pagealloc_enabled_static()) + __kernel_map_pages(page, numpages, 1); +} + +static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) +{ + if (debug_pagealloc_enabled_static()) + __kernel_map_pages(page, numpages, 0); +} + #ifdef CONFIG_HIBERNATION extern bool kernel_page_present(struct page *page); #endif /* CONFIG_HIBERNATION */ #else /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */ static inline void kernel_map_pages(struct page *page, int numpages, int enable) {} +static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {} +static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {} #ifdef CONFIG_HIBERNATION static inline bool kernel_page_present(struct page *page) { return true; } #endif /* CONFIG_HIBERNATION */ -- cgit v1.2.3 From 2abf962a8d42b32f5ffeb827826290b799c85f86 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 14 Dec 2020 19:10:25 -0800 Subject: PM: hibernate: make direct map manipulations more explicit When DEBUG_PAGEALLOC or ARCH_HAS_SET_DIRECT_MAP is enabled a page may be not present in the direct map and has to be explicitly mapped before it could be copied. Introduce hibernate_map_page() and hibernation_unmap_page() that will explicitly use set_direct_map_{default,invalid}_noflush() for ARCH_HAS_SET_DIRECT_MAP case and debug_pagealloc_{map,unmap}_pages() for DEBUG_PAGEALLOC case. The remapping of the pages in safe_copy_page() presumes that it only changes protection bits in an existing PTE and so it is safe to ignore return value of set_direct_map_{default,invalid}_noflush(). Still, add a pr_warn() so that future changes in set_memory APIs will not silently break hibernation. Link: https://lkml.kernel.org/r/20201109192128.960-3-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Rafael J. Wysocki Reviewed-by: David Hildenbrand Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Cc: Albert Ou Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Lameter Cc: Dave Hansen Cc: David Rientjes Cc: "David S. Miller" Cc: "Edgecombe, Rick P" Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Joonsoo Kim Cc: Len Brown Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Pavel Machek Cc: Pekka Enberg Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0f4c34672a13..6b5df31387b5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2934,16 +2934,6 @@ static inline bool debug_pagealloc_enabled_static(void) #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP) extern void __kernel_map_pages(struct page *page, int numpages, int enable); -/* - * When called in DEBUG_PAGEALLOC context, the call should most likely be - * guarded by debug_pagealloc_enabled() or debug_pagealloc_enabled_static() - */ -static inline void -kernel_map_pages(struct page *page, int numpages, int enable) -{ - __kernel_map_pages(page, numpages, enable); -} - static inline void debug_pagealloc_map_pages(struct page *page, int numpages) { if (debug_pagealloc_enabled_static()) @@ -2960,8 +2950,6 @@ static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) extern bool kernel_page_present(struct page *page); #endif /* CONFIG_HIBERNATION */ #else /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */ -static inline void -kernel_map_pages(struct page *page, int numpages, int enable) {} static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {} static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {} #ifdef CONFIG_HIBERNATION -- cgit v1.2.3 From 5d6ad668f31625c6aa9ed8dc3bdb29561d2b1144 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 14 Dec 2020 19:10:30 -0800 Subject: arch, mm: restore dependency of __kernel_map_pages() on DEBUG_PAGEALLOC The design of DEBUG_PAGEALLOC presumes that __kernel_map_pages() must never fail. With this assumption is wouldn't be safe to allow general usage of this function. Moreover, some architectures that implement __kernel_map_pages() have this function guarded by #ifdef DEBUG_PAGEALLOC and some refuse to map/unmap pages when page allocation debugging is disabled at runtime. As all the users of __kernel_map_pages() were converted to use debug_pagealloc_map_pages() it is safe to make it available only when DEBUG_PAGEALLOC is set. Link: https://lkml.kernel.org/r/20201109192128.960-4-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: David Hildenbrand Acked-by: Kirill A. Shutemov Cc: Albert Ou Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Lameter Cc: Dave Hansen Cc: David Rientjes Cc: "David S. Miller" Cc: "Edgecombe, Rick P" Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Joonsoo Kim Cc: Len Brown Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Pavel Machek Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: "Rafael J. Wysocki" Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6b5df31387b5..7a67bdf3df5e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2931,7 +2931,11 @@ static inline bool debug_pagealloc_enabled_static(void) return static_branch_unlikely(&_debug_pagealloc_enabled); } -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP) +#ifdef CONFIG_DEBUG_PAGEALLOC +/* + * To support DEBUG_PAGEALLOC architecture must ensure that + * __kernel_map_pages() never fails + */ extern void __kernel_map_pages(struct page *page, int numpages, int enable); static inline void debug_pagealloc_map_pages(struct page *page, int numpages) @@ -2949,13 +2953,13 @@ static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) #ifdef CONFIG_HIBERNATION extern bool kernel_page_present(struct page *page); #endif /* CONFIG_HIBERNATION */ -#else /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */ +#else /* CONFIG_DEBUG_PAGEALLOC */ static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {} static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {} #ifdef CONFIG_HIBERNATION static inline bool kernel_page_present(struct page *page) { return true; } #endif /* CONFIG_HIBERNATION */ -#endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */ +#endif /* CONFIG_DEBUG_PAGEALLOC */ #ifdef __HAVE_ARCH_GATE_AREA extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); -- cgit v1.2.3 From 32a0de886eb3cb7e6990da27a9cdfa50baa8be64 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 14 Dec 2020 19:10:35 -0800 Subject: arch, mm: make kernel_page_present() always available For architectures that enable ARCH_HAS_SET_MEMORY having the ability to verify that a page is mapped in the kernel direct map can be useful regardless of hibernation. Add RISC-V implementation of kernel_page_present(), update its forward declarations and stubs to be a part of set_memory API and remove ugly ifdefery in inlcude/linux/mm.h around current declarations of kernel_page_present(). Link: https://lkml.kernel.org/r/20201109192128.960-5-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: Kirill A. Shutemov Cc: Albert Ou Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Lameter Cc: Dave Hansen Cc: David Hildenbrand Cc: David Rientjes Cc: "David S. Miller" Cc: "Edgecombe, Rick P" Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Joonsoo Kim Cc: Len Brown Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Pavel Machek Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: "Rafael J. Wysocki" Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 7 ------- include/linux/set_memory.h | 5 +++++ 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7a67bdf3df5e..5ec79757eeae 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2949,16 +2949,9 @@ static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) if (debug_pagealloc_enabled_static()) __kernel_map_pages(page, numpages, 0); } - -#ifdef CONFIG_HIBERNATION -extern bool kernel_page_present(struct page *page); -#endif /* CONFIG_HIBERNATION */ #else /* CONFIG_DEBUG_PAGEALLOC */ static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {} static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {} -#ifdef CONFIG_HIBERNATION -static inline bool kernel_page_present(struct page *page) { return true; } -#endif /* CONFIG_HIBERNATION */ #endif /* CONFIG_DEBUG_PAGEALLOC */ #ifdef __HAVE_ARCH_GATE_AREA diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index 860e0f843c12..fe1aa4e54680 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -23,6 +23,11 @@ static inline int set_direct_map_default_noflush(struct page *page) { return 0; } + +static inline bool kernel_page_present(struct page *page) +{ + return true; +} #endif #ifndef set_mce_nospec -- cgit v1.2.3 From 952eaf815925f106eb6b68346b3458a68bb18ec1 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Dec 2020 19:10:53 -0800 Subject: mm, page_alloc: cache pageset high and batch in struct zone All per-cpu pagesets for a zone use the same high and batch values, that are duplicated there just for performance (locality) reasons. This patch adds the same variables also to struct zone as a shared copy. This will be useful later for making possible to disable pcplists temporarily by setting high value to 0, while remembering the values for restoring them later. But we can also immediately benefit from not updating pagesets of all possible cpus in case the newly recalculated values (after sysctl change or memory online/offline) are actually unchanged from the previous ones. Link: https://lkml.kernel.org/r/20201111092812.11329-6-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6e0025b5a88f..4eee08b0062b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -470,6 +470,12 @@ struct zone { #endif struct pglist_data *zone_pgdat; struct per_cpu_pageset __percpu *pageset; + /* + * the high and batch values are copied to individual pagesets for + * faster access + */ + int pageset_high; + int pageset_batch; #ifndef CONFIG_SPARSEMEM /* -- cgit v1.2.3 From 2ee08717da50160c20056f6d6b76afdf65db33ab Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 14 Dec 2020 19:11:02 -0800 Subject: include/linux/page-flags.h: remove unused __[Set|Clear]PagePrivate They are not used anymore. Link: https://lkml.kernel.org/r/20201009135914.64826-1-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 4f6ba9379112..50cbf5e931bc 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -363,8 +363,7 @@ PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) * for its own purposes. * - PG_private and PG_private_2 cause releasepage() and co to be invoked */ -PAGEFLAG(Private, private, PF_ANY) __SETPAGEFLAG(Private, private, PF_ANY) - __CLEARPAGEFLAG(Private, private, PF_ANY) +PAGEFLAG(Private, private, PF_ANY) PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY) PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY) TESTCLEARFLAG(OwnerPriv1, owner_priv_1, PF_ANY) -- cgit v1.2.3 From 3b12da6d1d4adff087939c071e0d74a7857439a0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 14 Dec 2020 19:11:05 -0800 Subject: mm/page-flags: fix comment We haven't had 'dontuse' flags since 2002. Replace this obsolete warning with a hopefully more useful one. Link: https://lkml.kernel.org/r/20201027025823.3704-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 50cbf5e931bc..c1368af622c7 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -86,8 +86,7 @@ */ /* - * Don't use the *_dontuse flags. Use the macros. Otherwise you'll break - * locked- and dirty-page accounting. + * Don't use the pageflags directly. Use the PageFoo macros. * * The page flags field is split into two parts, the main flags area * which extends from the low bits upwards, and the fields area which -- cgit v1.2.3 From ebfe1b8f6ea5d83d8c1aa18ddd8ede432a7414e7 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 14 Dec 2020 19:11:58 -0800 Subject: include/linux/huge_mm.h: remove extern keyword The external function definitions don't need the "extern" keyword. Remove them so future changes don't copy the function definition style. Link: https://lkml.kernel.org/r/20201106235135.32109-1-rcampbell@nvidia.com Signed-off-by: Ralph Campbell Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 93 ++++++++++++++++++++++--------------------------- 1 file changed, 41 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 0365aa97f8e7..6a19f35f836b 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -7,43 +7,37 @@ #include /* only for vma_is_dax() */ -extern vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf); -extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, - pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, - struct vm_area_struct *vma); -extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd); -extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, - pud_t *dst_pud, pud_t *src_pud, unsigned long addr, - struct vm_area_struct *vma); +vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf); +int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, + struct vm_area_struct *vma); +void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd); +int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pud_t *dst_pud, pud_t *src_pud, unsigned long addr, + struct vm_area_struct *vma); #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD -extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud); +void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud); #else static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) { } #endif -extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd); -extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, - unsigned long addr, - pmd_t *pmd, - unsigned int flags); -extern bool madvise_free_huge_pmd(struct mmu_gather *tlb, - struct vm_area_struct *vma, - pmd_t *pmd, unsigned long addr, unsigned long next); -extern int zap_huge_pmd(struct mmu_gather *tlb, - struct vm_area_struct *vma, - pmd_t *pmd, unsigned long addr); -extern int zap_huge_pud(struct mmu_gather *tlb, - struct vm_area_struct *vma, - pud_t *pud, unsigned long addr); -extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, - pmd_t *old_pmd, pmd_t *new_pmd); -extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, pgprot_t newprot, - unsigned long cp_flags); +vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd); +struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmd, + unsigned int flags); +bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, + pmd_t *pmd, unsigned long addr, unsigned long next); +int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, + unsigned long addr); +int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, + unsigned long addr); +bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, + unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd); +int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, + pgprot_t newprot, unsigned long cp_flags); vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn, pgprot_t pgprot, bool write); @@ -100,13 +94,13 @@ enum transparent_hugepage_flag { struct kobject; struct kobj_attribute; -extern ssize_t single_hugepage_flag_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count, - enum transparent_hugepage_flag flag); -extern ssize_t single_hugepage_flag_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf, - enum transparent_hugepage_flag flag); +ssize_t single_hugepage_flag_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count, + enum transparent_hugepage_flag flag); +ssize_t single_hugepage_flag_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf, + enum transparent_hugepage_flag flag); extern struct kobj_attribute shmem_enabled_attr; #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) @@ -179,12 +173,11 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, (transparent_hugepage_flags & \ (1< Date: Mon, 14 Dec 2020 19:12:46 -0800 Subject: mm/compaction: make defer_compaction and compaction_deferred static defer_compaction() and compaction_deferred() and compaction_restarting() in mm/compaction.c won't be used in other files, so make them static, and remove the declaration in the header file. Take the chance to fix a typo. Link: https://lkml.kernel.org/r/20201123170801.GA9625@rlk Signed-off-by: Hui Su Acked-by: Vlastimil Babka Cc: Nitin Gupta Cc: Baoquan He Cc: Mateusz Nosek Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 1de5a1151ee7..ed4070ed41ef 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -98,11 +98,8 @@ extern void reset_isolation_suitable(pg_data_t *pgdat); extern enum compact_result compaction_suitable(struct zone *zone, int order, unsigned int alloc_flags, int highest_zoneidx); -extern void defer_compaction(struct zone *zone, int order); -extern bool compaction_deferred(struct zone *zone, int order); extern void compaction_defer_reset(struct zone *zone, int order, bool alloc_success); -extern bool compaction_restarting(struct zone *zone, int order); /* Compaction has made some progress and retrying makes sense */ static inline bool compaction_made_progress(enum compact_result result) @@ -194,15 +191,6 @@ static inline enum compact_result compaction_suitable(struct zone *zone, int ord return COMPACT_SKIPPED; } -static inline void defer_compaction(struct zone *zone, int order) -{ -} - -static inline bool compaction_deferred(struct zone *zone, int order) -{ - return true; -} - static inline bool compaction_made_progress(enum compact_result result) { return false; -- cgit v1.2.3 From 0060ef3b4e6dd1410da164d48a595eadb2fb02f7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 14 Dec 2020 19:12:59 -0800 Subject: mm: support THPs in zero_user_segments We can only kmap() one subpage of a THP at a time, so loop over all relevant subpages, skipping ones which don't need to be zeroed. This is too large to inline when THPs are enabled and we actually need highmem, so put it in highmem.c. [willy@infradead.org: start1 was allowed to be less than start2] Link: https://lkml.kernel.org/r/20201124041507.28996-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Yang Shi Cc: Jan Kara Cc: Michal Hocko Cc: Zi Yan Cc: Song Liu Cc: Mel Gorman Cc: Naresh Kamboju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 14e6202ce47f..8e21fe82b3a3 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -284,13 +284,22 @@ static inline void clear_highpage(struct page *page) kunmap_atomic(kaddr); } +/* + * If we pass in a base or tail page, we can zero up to PAGE_SIZE. + * If we pass in a head page, we can zero up to the size of the compound page. + */ +#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +void zero_user_segments(struct page *page, unsigned start1, unsigned end1, + unsigned start2, unsigned end2); +#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */ static inline void zero_user_segments(struct page *page, - unsigned start1, unsigned end1, - unsigned start2, unsigned end2) + unsigned start1, unsigned end1, + unsigned start2, unsigned end2) { void *kaddr = kmap_atomic(page); + unsigned int i; - BUG_ON(end1 > PAGE_SIZE || end2 > PAGE_SIZE); + BUG_ON(end1 > page_size(page) || end2 > page_size(page)); if (end1 > start1) memset(kaddr + start1, 0, end1 - start1); @@ -299,8 +308,10 @@ static inline void zero_user_segments(struct page *page, memset(kaddr + start2, 0, end2 - start2); kunmap_atomic(kaddr); - flush_dcache_page(page); + for (i = 0; i < compound_nr(page); i++) + flush_dcache_page(page + i); } +#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */ static inline void zero_user_segment(struct page *page, unsigned start, unsigned end) -- cgit v1.2.3 From 236c32eb109696590b7428957eda50cc05e22af8 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Mon, 14 Dec 2020 19:13:13 -0800 Subject: mm: migrate: clean up migrate_prep{_local} The migrate_prep{_local} never fails, so it is pointless to have return value and check the return value. Link: https://lkml.kernel.org/r/20201113205359.556831-5-shy828301@gmail.com Signed-off-by: Yang Shi Reviewed-by: Zi Yan Cc: Jan Kara Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Song Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 0f8d1583fa8e..4594838a0f7c 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -45,8 +45,8 @@ extern struct page *alloc_migration_target(struct page *page, unsigned long priv extern int isolate_movable_page(struct page *page, isolate_mode_t mode); extern void putback_movable_page(struct page *page); -extern int migrate_prep(void); -extern int migrate_prep_local(void); +extern void migrate_prep(void); +extern void migrate_prep_local(void); extern void migrate_page_states(struct page *newpage, struct page *page); extern void migrate_page_copy(struct page *newpage, struct page *page); extern int migrate_huge_page_move_mapping(struct address_space *mapping, -- cgit v1.2.3 From 04013513cc84c401c7de9023ff3eda7863fc4add Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Dec 2020 19:13:30 -0800 Subject: mm, page_alloc: do not rely on the order of page_poison and init_on_alloc/free parameters Patch series "cleanup page poisoning", v3. I have identified a number of issues and opportunities for cleanup with CONFIG_PAGE_POISON and friends: - interaction with init_on_alloc and init_on_free parameters depends on the order of parameters (Patch 1) - the boot time enabling uses static key, but inefficienty (Patch 2) - sanity checking is incompatible with hibernation (Patch 3) - CONFIG_PAGE_POISONING_NO_SANITY can be removed now that we have init_on_free (Patch 4) - CONFIG_PAGE_POISONING_ZERO can be most likely removed now that we have init_on_free (Patch 5) This patch (of 5): Enabling page_poison=1 together with init_on_alloc=1 or init_on_free=1 produces a warning in dmesg that page_poison takes precedence. However, as these warnings are printed in early_param handlers for init_on_alloc/free, they are not printed if page_poison is enabled later on the command line (handlers are called in the order of their parameters), or when init_on_alloc/free is always enabled by the respective config option - before the page_poison early param handler is called, it is not considered to be enabled. This is inconsistent. We can remove the dependency on order by making the init_on_* parameters only set a boolean variable, and postponing the evaluation after all early params have been processed. Introduce a new init_mem_debugging_and_hardening() function for that, and move the related debug_pagealloc processing there as well. As a result init_mem_debugging_and_hardening() knows always accurately if init_on_* and/or page_poison options were enabled. Thus we can also optimize want_init_on_alloc() and want_init_on_free(). We don't need to check page_poisoning_enabled() there, we can instead not enable the init_on_* static keys at all, if page poisoning is enabled. This results in a simpler and more effective code. Link: https://lkml.kernel.org/r/20201113104033.22907-1-vbabka@suse.cz Link: https://lkml.kernel.org/r/20201113104033.22907-2-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reviewed-by: David Hildenbrand Reviewed-by: Mike Rapoport Cc: Rafael J. Wysocki Cc: Alexander Potapenko Cc: Kees Cook Cc: Michal Hocko Cc: Mateusz Nosek Cc: Laura Abbott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5ec79757eeae..591c784277ea 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2872,6 +2872,7 @@ extern int apply_to_existing_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); +extern void init_mem_debugging_and_hardening(void); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); extern void kernel_poison_pages(struct page *page, int numpages, int enable); @@ -2881,35 +2882,20 @@ static inline void kernel_poison_pages(struct page *page, int numpages, int enable) { } #endif -#ifdef CONFIG_INIT_ON_ALLOC_DEFAULT_ON -DECLARE_STATIC_KEY_TRUE(init_on_alloc); -#else DECLARE_STATIC_KEY_FALSE(init_on_alloc); -#endif static inline bool want_init_on_alloc(gfp_t flags) { - if (static_branch_unlikely(&init_on_alloc) && - !page_poisoning_enabled()) + if (static_branch_unlikely(&init_on_alloc)) return true; return flags & __GFP_ZERO; } -#ifdef CONFIG_INIT_ON_FREE_DEFAULT_ON -DECLARE_STATIC_KEY_TRUE(init_on_free); -#else DECLARE_STATIC_KEY_FALSE(init_on_free); -#endif static inline bool want_init_on_free(void) { - return static_branch_unlikely(&init_on_free) && - !page_poisoning_enabled(); + return static_branch_unlikely(&init_on_free); } -#ifdef CONFIG_DEBUG_PAGEALLOC -extern void init_debug_pagealloc(void); -#else -static inline void init_debug_pagealloc(void) {} -#endif extern bool _debug_pagealloc_enabled_early; DECLARE_STATIC_KEY_FALSE(_debug_pagealloc_enabled); -- cgit v1.2.3 From 8db26a3d47354ce7271a8cab03cd65b9d3d610b9 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Dec 2020 19:13:34 -0800 Subject: mm, page_poison: use static key more efficiently Commit 11c9c7edae06 ("mm/page_poison.c: replace bool variable with static key") changed page_poisoning_enabled() to a static key check. However, the function is not inlined, so each check still involves a function call with overhead not eliminated when page poisoning is disabled. Analogically to how debug_pagealloc is handled, this patch converts page_poisoning_enabled() back to boolean check, and introduces page_poisoning_enabled_static() for fast paths. Both functions are inlined. The function kernel_poison_pages() is also called unconditionally and does the static key check inside. Remove it from there and put it to callers. Also split it to two functions kernel_poison_pages() and kernel_unpoison_pages() instead of the confusing bool parameter. Also optimize the check that enables page poisoning instead of debug_pagealloc for architectures without proper debug_pagealloc support. Move the check to init_mem_debugging_and_hardening() to enable a single static key instead of having two static branches in page_poisoning_enabled_static(). Link: https://lkml.kernel.org/r/20201113104033.22907-3-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reviewed-by: David Hildenbrand Cc: Mike Rapoport Cc: Rafael J. Wysocki Cc: Alexander Potapenko Cc: Kees Cook Cc: Laura Abbott Cc: Mateusz Nosek Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 591c784277ea..026707a58159 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2874,12 +2874,37 @@ extern int apply_to_existing_page_range(struct mm_struct *mm, extern void init_mem_debugging_and_hardening(void); #ifdef CONFIG_PAGE_POISONING -extern bool page_poisoning_enabled(void); -extern void kernel_poison_pages(struct page *page, int numpages, int enable); +extern void __kernel_poison_pages(struct page *page, int numpages); +extern void __kernel_unpoison_pages(struct page *page, int numpages); +extern bool _page_poisoning_enabled_early; +DECLARE_STATIC_KEY_FALSE(_page_poisoning_enabled); +static inline bool page_poisoning_enabled(void) +{ + return _page_poisoning_enabled_early; +} +/* + * For use in fast paths after init_mem_debugging() has run, or when a + * false negative result is not harmful when called too early. + */ +static inline bool page_poisoning_enabled_static(void) +{ + return static_branch_unlikely(&_page_poisoning_enabled); +} +static inline void kernel_poison_pages(struct page *page, int numpages) +{ + if (page_poisoning_enabled_static()) + __kernel_poison_pages(page, numpages); +} +static inline void kernel_unpoison_pages(struct page *page, int numpages) +{ + if (page_poisoning_enabled_static()) + __kernel_unpoison_pages(page, numpages); +} #else static inline bool page_poisoning_enabled(void) { return false; } -static inline void kernel_poison_pages(struct page *page, int numpages, - int enable) { } +static inline bool page_poisoning_enabled_static(void) { return false; } +static inline void kernel_poison_pages(struct page *page, int numpages) { } +static inline void kernel_unpoison_pages(struct page *page, int numpages) { } #endif DECLARE_STATIC_KEY_FALSE(init_on_alloc); -- cgit v1.2.3 From 03b6c9a3e8805606c0bb4ad41855fac3bf85c3b9 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Dec 2020 19:13:38 -0800 Subject: kernel/power: allow hibernation with page_poison sanity checking Page poisoning used to be incompatible with hibernation, as the state of poisoned pages was lost after resume, thus enabling CONFIG_HIBERNATION forces CONFIG_PAGE_POISONING_NO_SANITY. For the same reason, the poisoning with zeroes variant CONFIG_PAGE_POISONING_ZERO used to disable hibernation. The latter restriction was removed by commit 1ad1410f632d ("PM / Hibernate: allow hibernation with PAGE_POISONING_ZERO") and similarly for init_on_free by commit 18451f9f9e58 ("PM: hibernate: fix crashes with init_on_free=1") by making sure free pages are cleared after resume. We can use the same mechanism to instead poison free pages with PAGE_POISON after resume. This covers both zero and 0xAA patterns. Thus we can remove the Kconfig restriction that disables page poison sanity checking when hibernation is enabled. Link: https://lkml.kernel.org/r/20201113104033.22907-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Rafael J. Wysocki [hibernation] Reviewed-by: David Hildenbrand Cc: Mike Rapoport Cc: Alexander Potapenko Cc: Kees Cook Cc: Laura Abbott Cc: Mateusz Nosek Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 026707a58159..3e1fe8ca9720 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2903,6 +2903,7 @@ static inline void kernel_unpoison_pages(struct page *page, int numpages) #else static inline bool page_poisoning_enabled(void) { return false; } static inline bool page_poisoning_enabled_static(void) { return false; } +static inline void __kernel_poison_pages(struct page *page, int nunmpages) { } static inline void kernel_poison_pages(struct page *page, int numpages) { } static inline void kernel_unpoison_pages(struct page *page, int numpages) { } #endif -- cgit v1.2.3 From f289041ed4cf9a3f6e8a32068fef9ffb2acc5662 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Dec 2020 19:13:45 -0800 Subject: mm, page_poison: remove CONFIG_PAGE_POISONING_ZERO CONFIG_PAGE_POISONING_ZERO uses the zero pattern instead of 0xAA. It was introduced by commit 1414c7f4f7d7 ("mm/page_poisoning.c: allow for zero poisoning"), noting that using zeroes retains the benefit of sanitizing content of freed pages, with the benefit of not having to zero them again on alloc, and the downside of making some forms of corruption (stray writes of NULLs) harder to detect than with the 0xAA pattern. Together with CONFIG_PAGE_POISONING_NO_SANITY it made possible to sanitize the contents on free without checking it back on alloc. These days we have the init_on_free() option to achieve sanitization with zeroes and to save clearing on alloc (and without checking on alloc). Arguably if someone does choose to check the poison for corruption on alloc, the savings of not clearing the page are secondary, and it makes sense to always use the 0xAA poison pattern. Thus, remove the CONFIG_PAGE_POISONING_ZERO option for being redundant. Link: https://lkml.kernel.org/r/20201113104033.22907-6-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: David Hildenbrand Cc: Mike Rapoport Cc: Rafael J. Wysocki Cc: Alexander Potapenko Cc: Kees Cook Cc: Laura Abbott Cc: Mateusz Nosek Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/poison.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/poison.h b/include/linux/poison.h index dc8ae5d8db03..aff1c9250c82 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -27,11 +27,7 @@ #define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) /********** mm/page_poison.c **********/ -#ifdef CONFIG_PAGE_POISONING_ZERO -#define PAGE_POISON 0x00 -#else #define PAGE_POISON 0xaa -#endif /********** mm/page_alloc.c ************/ -- cgit v1.2.3 From 88dcb9a3fb48c67ec345f1cdbc2a26119d3cb57d Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 15 Dec 2020 12:33:20 -0800 Subject: mm/thp: move lru_add_page_tail() to huge_memory.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "per memcg lru lock", v21. This patchset includes 3 parts: 1) some code cleanup and minimum optimization as preparation 2) use TestCleanPageLRU as page isolation's precondition 3) replace per node lru_lock with per memcg per node lru_lock Current lru_lock is one for each of node, pgdat->lru_lock, that guard for lru lists, but now we had moved the lru lists into memcg for long time. Still using per node lru_lock is clearly unscalable, pages on each of memcgs have to compete each others for a whole lru_lock. This patchset try to use per lruvec/memcg lru_lock to repleace per node lru lock to guard lru lists, make it scalable for memcgs and get performance gain. Currently lru_lock still guards both lru list and page's lru bit, that's ok. but if we want to use specific lruvec lock on the page, we need to pin down the page's lruvec/memcg during locking. Just taking lruvec lock first may be undermined by the page's memcg charge/migration. To fix this problem, we could take out the page's lru bit clear and use it as pin down action to block the memcg changes. That's the reason for new atomic func TestClearPageLRU. So now isolating a page need both actions: TestClearPageLRU and hold the lru_lock. The typical usage of this is isolate_migratepages_block() in compaction.c we have to take lru bit before lru lock, that serialized the page isolation in memcg page charge/migration which will change page's lruvec and new lru_lock in it. The above solution suggested by Johannes Weiner, and based on his new memcg charge path, then have this patchset. (Hugh Dickins tested and contributed much code from compaction fix to general code polish, thanks a lot!). Daniel Jordan's testing show 62% improvement on modified readtwice case on his 2P * 10 core * 2 HT broadwell box on v18, which has no much different with this v20. https://lore.kernel.org/lkml/20200915165807.kpp7uhiw7l3loofu@ca-dmjordan1.us.oracle.com/ Thanks to Hugh Dickins and Konstantin Khlebnikov, they both brought this idea 8 years ago, and others who gave comments as well: Daniel Jordan, Mel Gorman, Shakeel Butt, Matthew Wilcox, Alexander Duyck etc. Thanks for Testing support from Intel 0day and Rong Chen, Fengguang Wu, and Yun Wang. Hugh Dickins also shared his kbuild-swap case. This patch (of 19): lru_add_page_tail() is only used in huge_memory.c, defining it in other file with a CONFIG_TRANSPARENT_HUGEPAGE macro restrict just looks weird. Let's move it THP. And make it static as Hugh Dickins suggested. Link: https://lkml.kernel.org/r/1604566549-62481-1-git-send-email-alex.shi@linux.alibaba.com Link: https://lkml.kernel.org/r/1604566549-62481-2-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi Reviewed-by: Kirill A. Shutemov Acked-by: Hugh Dickins Acked-by: Johannes Weiner Cc: Matthew Wilcox Cc: Mel Gorman Cc: Tejun Heo Cc: Konstantin Khlebnikov Cc: Daniel Jordan Cc: Shakeel Butt Cc: Joonsoo Kim Cc: Wei Yang Cc: Alexander Duyck Cc: "Chen, Rong A" Cc: Michal Hocko Cc: Vladimir Davydov Cc: Andrea Arcangeli Cc: Andrey Ryabinin Cc: "Huang, Ying" Cc: Jann Horn Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Mika Penttilä Cc: Minchan Kim Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 667935c0dbd4..5e1e967c225f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -338,8 +338,6 @@ extern void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); extern void lru_note_cost_page(struct page *); extern void lru_cache_add(struct page *); -extern void lru_add_page_tail(struct page *page, struct page *page_tail, - struct lruvec *lruvec, struct list_head *head); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); -- cgit v1.2.3 From d25b5bd8a8f420b15517c19c4626c0c009f72a63 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 15 Dec 2020 12:34:16 -0800 Subject: mm/lru: introduce TestClearPageLRU() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently lru_lock still guards both lru list and page's lru bit, that's ok. but if we want to use specific lruvec lock on the page, we need to pin down the page's lruvec/memcg during locking. Just taking lruvec lock first may be undermined by the page's memcg charge/migration. To fix this problem, we will clear the lru bit out of locking and use it as pin down action to block the page isolation in memcg changing. So now a standard steps of page isolation is following: 1, get_page(); #pin the page avoid to be free 2, TestClearPageLRU(); #block other isolation like memcg change 3, spin_lock on lru_lock; #serialize lru list access 4, delete page from lru list; This patch start with the first part: TestClearPageLRU, which combines PageLRU check and ClearPageLRU into a macro func TestClearPageLRU. This function will be used as page isolation precondition to prevent other isolations some where else. Then there are may !PageLRU page on lru list, need to remove BUG() checking accordingly. There 2 rules for lru bit now: 1, the lru bit still indicate if a page on lru list, just in some temporary moment(isolating), the page may have no lru bit when it's on lru list. but the page still must be on lru list when the lru bit set. 2, have to remove lru bit before delete it from lru list. As Andrew Morton mentioned this change would dirty cacheline for a page which isn't on the LRU. But the loss would be acceptable in Rong Chen report: https://lore.kernel.org/lkml/20200304090301.GB5972@shao2-debian/ Link: https://lkml.kernel.org/r/1604566549-62481-15-git-send-email-alex.shi@linux.alibaba.com Suggested-by: Johannes Weiner Signed-off-by: Alex Shi Acked-by: Hugh Dickins Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Michal Hocko Cc: Vladimir Davydov Cc: Alexander Duyck Cc: Andrea Arcangeli Cc: Andrey Ryabinin Cc: Daniel Jordan Cc: "Huang, Ying" Cc: Jann Horn Cc: Joonsoo Kim Cc: Kirill A. Shutemov Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Matthew Wilcox (Oracle) Cc: Mel Gorman Cc: Michal Hocko Cc: Mika Penttilä Cc: Minchan Kim Cc: Shakeel Butt Cc: Tejun Heo Cc: Thomas Gleixner Cc: Wei Yang Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c1368af622c7..f8b4375ce1b3 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -334,6 +334,7 @@ PAGEFLAG(Referenced, referenced, PF_HEAD) PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD) __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD) PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD) + TESTCLEARFLAG(LRU, lru, PF_HEAD) PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) TESTCLEARFLAG(Active, active, PF_HEAD) PAGEFLAG(Workingset, workingset, PF_HEAD) -- cgit v1.2.3 From 9df41314390b81a541ca6e84c8340bad0959e4b5 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 15 Dec 2020 12:34:20 -0800 Subject: mm/compaction: do page isolation first in compaction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, compaction would get the lru_lock and then do page isolation which works fine with pgdat->lru_lock, since any page isoltion would compete for the lru_lock. If we want to change to memcg lru_lock, we have to isolate the page before getting lru_lock, thus isoltion would block page's memcg change which relay on page isoltion too. Then we could safely use per memcg lru_lock later. The new page isolation use previous introduced TestClearPageLRU() + pgdat lru locking which will be changed to memcg lru lock later. Hugh Dickins fixed following bugs in this patch's early version: Fix lots of crashes under compaction load: isolate_migratepages_block() must clean up appropriately when rejecting a page, setting PageLRU again if it had been cleared; and a put_page() after get_page_unless_zero() cannot safely be done while holding locked_lruvec - it may turn out to be the final put_page(), which will take an lruvec lock when PageLRU. And move __isolate_lru_page_prepare back after get_page_unless_zero to make trylock_page() safe: trylock_page() is not safe to use at this time: its setting PG_locked can race with the page being freed or allocated ("Bad page"), and can also erase flags being set by one of those "sole owners" of a freshly allocated page who use non-atomic __SetPageFlag(). Link: https://lkml.kernel.org/r/1604566549-62481-16-git-send-email-alex.shi@linux.alibaba.com Suggested-by: Johannes Weiner Signed-off-by: Alex Shi Acked-by: Hugh Dickins Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Matthew Wilcox Cc: Alexander Duyck Cc: Andrea Arcangeli Cc: Andrey Ryabinin Cc: "Chen, Rong A" Cc: Daniel Jordan Cc: "Huang, Ying" Cc: Jann Horn Cc: Joonsoo Kim Cc: Kirill A. Shutemov Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Mel Gorman Cc: Michal Hocko Cc: Michal Hocko Cc: Mika Penttilä Cc: Minchan Kim Cc: Shakeel Butt Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vladimir Davydov Cc: Wei Yang Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 5e1e967c225f..596bc2f4d9b0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -356,7 +356,7 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page, extern unsigned long zone_reclaimable_pages(struct zone *zone); extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); -extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); +extern int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, -- cgit v1.2.3 From 6168d0da2b479ce25a4647de194045de1bdd1f1d Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 15 Dec 2020 12:34:29 -0800 Subject: mm/lru: replace pgdat lru_lock with lruvec lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch moves per node lru_lock into lruvec, thus bring a lru_lock for each of memcg per node. So on a large machine, each of memcg don't have to suffer from per node pgdat->lru_lock competition. They could go fast with their self lru_lock. After move memcg charge before lru inserting, page isolation could serialize page's memcg, then per memcg lruvec lock is stable and could replace per node lru lock. In isolate_migratepages_block(), compact_unlock_should_abort and lock_page_lruvec_irqsave are open coded to work with compact_control. Also add a debug func in locking which may give some clues if there are sth out of hands. Daniel Jordan's testing show 62% improvement on modified readtwice case on his 2P * 10 core * 2 HT broadwell box. https://lore.kernel.org/lkml/20200915165807.kpp7uhiw7l3loofu@ca-dmjordan1.us.oracle.com/ Hugh Dickins helped on the patch polish, thanks! [alex.shi@linux.alibaba.com: fix comment typo] Link: https://lkml.kernel.org/r/5b085715-292a-4b43-50b3-d73dc90d1de5@linux.alibaba.com [alex.shi@linux.alibaba.com: use page_memcg()] Link: https://lkml.kernel.org/r/5a4c2b72-7ee8-2478-fc0e-85eb83aafec4@linux.alibaba.com Link: https://lkml.kernel.org/r/1604566549-62481-18-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi Acked-by: Hugh Dickins Acked-by: Johannes Weiner Cc: Rong Chen Cc: Michal Hocko Cc: Vladimir Davydov Cc: Yang Shi Cc: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Daniel Jordan Cc: Alexander Duyck Cc: Andrea Arcangeli Cc: Andrey Ryabinin Cc: "Huang, Ying" Cc: Jann Horn Cc: Joonsoo Kim Cc: Kirill A. Shutemov Cc: Kirill A. Shutemov Cc: Mel Gorman Cc: Michal Hocko Cc: Mika Penttilä Cc: Minchan Kim Cc: Shakeel Butt Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mmzone.h | 3 ++- 2 files changed, 60 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f530d634f055..aa5d559853c2 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -491,6 +491,19 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); struct mem_cgroup *get_mem_cgroup_from_page(struct page *page); +struct lruvec *lock_page_lruvec(struct page *page); +struct lruvec *lock_page_lruvec_irq(struct page *page); +struct lruvec *lock_page_lruvec_irqsave(struct page *page, + unsigned long *flags); + +#ifdef CONFIG_DEBUG_VM +void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page); +#else +static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +{ +} +#endif + static inline struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ return css ? container_of(css, struct mem_cgroup, css) : NULL; @@ -996,6 +1009,31 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } +static inline struct lruvec *lock_page_lruvec(struct page *page) +{ + struct pglist_data *pgdat = page_pgdat(page); + + spin_lock(&pgdat->__lruvec.lru_lock); + return &pgdat->__lruvec; +} + +static inline struct lruvec *lock_page_lruvec_irq(struct page *page) +{ + struct pglist_data *pgdat = page_pgdat(page); + + spin_lock_irq(&pgdat->__lruvec.lru_lock); + return &pgdat->__lruvec; +} + +static inline struct lruvec *lock_page_lruvec_irqsave(struct page *page, + unsigned long *flagsp) +{ + struct pglist_data *pgdat = page_pgdat(page); + + spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp); + return &pgdat->__lruvec; +} + static inline struct mem_cgroup * mem_cgroup_iter(struct mem_cgroup *root, struct mem_cgroup *prev, @@ -1215,6 +1253,10 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } + +static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +{ +} #endif /* CONFIG_MEMCG */ /* idx can be of type enum memcg_stat_item or node_stat_item */ @@ -1296,6 +1338,22 @@ static inline struct lruvec *parent_lruvec(struct lruvec *lruvec) return mem_cgroup_lruvec(memcg, lruvec_pgdat(lruvec)); } +static inline void unlock_page_lruvec(struct lruvec *lruvec) +{ + spin_unlock(&lruvec->lru_lock); +} + +static inline void unlock_page_lruvec_irq(struct lruvec *lruvec) +{ + spin_unlock_irq(&lruvec->lru_lock); +} + +static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, + unsigned long flags) +{ + spin_unlock_irqrestore(&lruvec->lru_lock, flags); +} + #ifdef CONFIG_CGROUP_WRITEBACK struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 98a80c01d150..9da23c019dc5 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -276,6 +276,8 @@ enum lruvec_flags { struct lruvec { struct list_head lists[NR_LRU_LISTS]; + /* per lruvec lru_lock for memcg */ + spinlock_t lru_lock; /* * These track the cost of reclaiming one LRU - file or anon - * over the other. As the observed cost of reclaiming one LRU @@ -782,7 +784,6 @@ typedef struct pglist_data { /* Write-intensive fields used by page reclaim */ ZONE_PADDING(_pad1_) - spinlock_t lru_lock; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* -- cgit v1.2.3 From 2a5e4e340b0fe0f8d402196a466887db6a270b9b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 15 Dec 2020 12:34:33 -0800 Subject: mm/lru: introduce relock_page_lruvec() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add relock_page_lruvec() to replace repeated same code, no functional change. When testing for relock we can avoid the need for RCU locking if we simply compare the page pgdat and memcg pointers versus those that the lruvec is holding. By doing this we can avoid the extra pointer walks and accesses of the memory cgroup. In addition we can avoid the checks entirely if lruvec is currently NULL. [alex.shi@linux.alibaba.com: use page_memcg()] Link: https://lkml.kernel.org/r/66d8e79d-7ec6-bfbc-1c82-bf32db3ae5b7@linux.alibaba.com Link: https://lkml.kernel.org/r/1604566549-62481-19-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alexander Duyck Signed-off-by: Alex Shi Acked-by: Hugh Dickins Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Thomas Gleixner Cc: Andrey Ryabinin Cc: Matthew Wilcox Cc: Mel Gorman Cc: Konstantin Khlebnikov Cc: Tejun Heo Cc: Andrea Arcangeli Cc: "Chen, Rong A" Cc: Daniel Jordan Cc: "Huang, Ying" Cc: Jann Horn Cc: Joonsoo Kim Cc: Kirill A. Shutemov Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Michal Hocko Cc: Mika Penttilä Cc: Minchan Kim Cc: Shakeel Butt Cc: Vladimir Davydov Cc: Wei Yang Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index aa5d559853c2..ff02f831e7e1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -485,6 +485,22 @@ out: struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *); +static inline bool lruvec_holds_page_lru_lock(struct page *page, + struct lruvec *lruvec) +{ + pg_data_t *pgdat = page_pgdat(page); + const struct mem_cgroup *memcg; + struct mem_cgroup_per_node *mz; + + if (mem_cgroup_disabled()) + return lruvec == &pgdat->__lruvec; + + mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + memcg = page_memcg(page) ? : root_mem_cgroup; + + return lruvec->pgdat == pgdat && mz->memcg == memcg; +} + struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); @@ -984,6 +1000,14 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, return &pgdat->__lruvec; } +static inline bool lruvec_holds_page_lru_lock(struct page *page, + struct lruvec *lruvec) +{ + pg_data_t *pgdat = page_pgdat(page); + + return lruvec == &pgdat->__lruvec; +} + static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) { return NULL; @@ -1354,6 +1378,34 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, spin_unlock_irqrestore(&lruvec->lru_lock, flags); } +/* Don't lock again iff page's lruvec locked */ +static inline struct lruvec *relock_page_lruvec_irq(struct page *page, + struct lruvec *locked_lruvec) +{ + if (locked_lruvec) { + if (lruvec_holds_page_lru_lock(page, locked_lruvec)) + return locked_lruvec; + + unlock_page_lruvec_irq(locked_lruvec); + } + + return lock_page_lruvec_irq(page); +} + +/* Don't lock again iff page's lruvec locked */ +static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, + struct lruvec *locked_lruvec, unsigned long *flags) +{ + if (locked_lruvec) { + if (lruvec_holds_page_lru_lock(page, locked_lruvec)) + return locked_lruvec; + + unlock_page_lruvec_irqrestore(locked_lruvec, *flags); + } + + return lock_page_lruvec_irqsave(page, flags); +} + #ifdef CONFIG_CGROUP_WRITEBACK struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); -- cgit v1.2.3 From 15b447361794271f4d03c04d82276a841fe06328 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 15 Dec 2020 14:21:31 -0800 Subject: mm/lru: revise the comments of lru_lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we changed the pgdat->lru_lock to lruvec->lru_lock, it's time to fix the incorrect comments in code. Also fixed some zone->lru_lock comment error from ancient time. etc. I struggled to understand the comment above move_pages_to_lru() (surely it never calls page_referenced()), and eventually realized that most of it had got separated from shrink_active_list(): move that comment back. Link: https://lkml.kernel.org/r/1604566549-62481-20-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Hugh Dickins Signed-off-by: Alex Shi Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Tejun Heo Cc: Andrey Ryabinin Cc: Jann Horn Cc: Mel Gorman Cc: Matthew Wilcox Cc: Alexander Duyck Cc: Andrea Arcangeli Cc: "Chen, Rong A" Cc: Daniel Jordan Cc: "Huang, Ying" Cc: Joonsoo Kim Cc: Kirill A. Shutemov Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Michal Hocko Cc: Michal Hocko Cc: Mika Penttilä Cc: Minchan Kim Cc: Shakeel Butt Cc: Thomas Gleixner Cc: Vladimir Davydov Cc: Wei Yang Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 +- include/linux/mmzone.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 915f4f100383..a9688cc55964 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -79,7 +79,7 @@ struct page { struct { /* Page cache and anonymous pages */ /** * @lru: Pageout list, eg. active_list protected by - * pgdat->lru_lock. Sometimes used as a generic list + * lruvec->lru_lock. Sometimes used as a generic list * by the page owner. */ struct list_head lru; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9da23c019dc5..b593316bff3d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -113,8 +113,7 @@ static inline bool free_area_empty(struct free_area *area, int migratetype) struct pglist_data; /* - * zone->lock and the zone lru_lock are two of the hottest locks in the kernel. - * So add a wild amount of padding here to ensure that they fall into separate + * Add a wild amount of padding here to ensure datas fall into separate * cachelines. There are very few zone structures in the machine, so space * consumption is not a concern here. */ -- cgit v1.2.3 From c6c75deda81344c3a95d1d1f606d5cee109e5d54 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 15 Dec 2020 20:42:39 -0800 Subject: proc: fix lookup in /proc/net subdirectories after setns(2) Commit 1fde6f21d90f ("proc: fix /proc/net/* after setns(2)") only forced revalidation of regular files under /proc/net/ However, /proc/net/ is unusual in the sense of /proc/net/foo handlers take netns pointer from parent directory which is old netns. Steps to reproduce: (void)open("/proc/net/sctp/snmp", O_RDONLY); unshare(CLONE_NEWNET); int fd = open("/proc/net/sctp/snmp", O_RDONLY); read(fd, &c, 1); Read will read wrong data from original netns. Patch forces lookup on every directory under /proc/net . Link: https://lkml.kernel.org/r/20201205160916.GA109739@localhost.localdomain Fixes: 1da4d377f943 ("proc: revalidate misc dentries") Signed-off-by: Alexey Dobriyan Reported-by: "Rantala, Tommi T. (Nokia - FI/Espoo)" Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 270cab43ca3d..000cc0533c33 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -80,6 +80,7 @@ extern void proc_flush_pid(struct pid *); extern struct proc_dir_entry *proc_symlink(const char *, struct proc_dir_entry *, const char *); +struct proc_dir_entry *_proc_mkdir(const char *, umode_t, struct proc_dir_entry *, void *, bool); extern struct proc_dir_entry *proc_mkdir(const char *, struct proc_dir_entry *); extern struct proc_dir_entry *proc_mkdir_data(const char *, umode_t, struct proc_dir_entry *, void *); @@ -162,6 +163,11 @@ static inline struct proc_dir_entry *proc_symlink(const char *name, static inline struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) {return NULL;} static inline struct proc_dir_entry *proc_create_mount_point(const char *name) { return NULL; } +static inline struct proc_dir_entry *_proc_mkdir(const char *name, umode_t mode, + struct proc_dir_entry *parent, void *data, bool force_lookup) +{ + return NULL; +} static inline struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, struct proc_dir_entry *parent, void *data) { return NULL; } static inline struct proc_dir_entry *proc_mkdir_mode(const char *name, @@ -199,7 +205,7 @@ struct net; static inline struct proc_dir_entry *proc_net_mkdir( struct net *net, const char *name, struct proc_dir_entry *parent) { - return proc_mkdir_data(name, 0, parent, net); + return _proc_mkdir(name, 0, parent, net, true); } struct ns_common; -- cgit v1.2.3 From aa6159ab99a9ab5df835b4750b66cf132a5aa292 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 15 Dec 2020 20:42:48 -0800 Subject: kernel.h: split out mathematical helpers kernel.h is being used as a dump for all kinds of stuff for a long time. Here is the attempt to start cleaning it up by splitting out mathematical helpers. At the same time convert users in header and lib folder to use new header. Though for time being include new header back to kernel.h to avoid twisted indirected includes for existing users. [sfr@canb.auug.org.au: fix powerpc build] Link: https://lkml.kernel.org/r/20201029150809.13059608@canb.auug.org.au Link: https://lkml.kernel.org/r/20201028173212.41768-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: "Paul E. McKenney" Cc: Trond Myklebust Cc: Jeff Layton Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 11 ++- include/linux/dcache.h | 1 + include/linux/iommu-helper.h | 4 +- include/linux/kernel.h | 173 +---------------------------------------- include/linux/math.h | 177 ++++++++++++++++++++++++++++++++++++++++++ include/linux/rcu_node_tree.h | 2 + include/linux/units.h | 2 +- 7 files changed, 194 insertions(+), 176 deletions(-) create mode 100644 include/linux/math.h (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 5b74bdf159d6..a61f192c096b 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -1,9 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BITOPS_H #define _LINUX_BITOPS_H + #include #include +#include + /* Set bits in the first 'n' bytes when loaded from memory */ #ifdef __LITTLE_ENDIAN # define aligned_byte_mask(n) ((1UL << 8*(n))-1) @@ -12,10 +15,10 @@ #endif #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) -#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) -#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) -#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(char)) +#define BITS_TO_LONGS(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) +#define BITS_TO_U64(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) +#define BITS_TO_U32(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) +#define BITS_TO_BYTES(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(char)) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 6f95c3300cbb..d7b369fc15d3 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index 70d01edcbf8b..74be34f3a20a 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -3,7 +3,9 @@ #define _LINUX_IOMMU_HELPER_H #include -#include +#include +#include +#include static inline unsigned long iommu_device_max_index(unsigned long size, unsigned long offset, diff --git a/include/linux/kernel.h b/include/linux/kernel.h index dbf6018fc312..f7902d8c1048 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -2,7 +2,6 @@ #ifndef _LINUX_KERNEL_H #define _LINUX_KERNEL_H - #include #include #include @@ -11,12 +10,14 @@ #include #include #include +#include #include #include #include #include + #include -#include + #include #define STACK_MAGIC 0xdeadbeef @@ -54,125 +55,11 @@ } \ ) -/* - * This looks more complex than it should be. But we need to - * get the type for the ~ right in round_down (it needs to be - * as wide as the result!), and we want to evaluate the macro - * arguments just once each. - */ -#define __round_mask(x, y) ((__typeof__(x))((y)-1)) -/** - * round_up - round up to next specified power of 2 - * @x: the value to round - * @y: multiple to round up to (must be a power of 2) - * - * Rounds @x up to next multiple of @y (which must be a power of 2). - * To perform arbitrary rounding up, use roundup() below. - */ -#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) -/** - * round_down - round down to next specified power of 2 - * @x: the value to round - * @y: multiple to round down to (must be a power of 2) - * - * Rounds @x down to next multiple of @y (which must be a power of 2). - * To perform arbitrary rounding down, use rounddown() below. - */ -#define round_down(x, y) ((x) & ~__round_mask(x, y)) - #define typeof_member(T, m) typeof(((T*)0)->m) -#define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP - -#define DIV_ROUND_DOWN_ULL(ll, d) \ - ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; }) - -#define DIV_ROUND_UP_ULL(ll, d) \ - DIV_ROUND_DOWN_ULL((unsigned long long)(ll) + (d) - 1, (d)) - -#if BITS_PER_LONG == 32 -# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d) -#else -# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d) -#endif - -/** - * roundup - round up to the next specified multiple - * @x: the value to up - * @y: multiple to round up to - * - * Rounds @x up to next multiple of @y. If @y will always be a power - * of 2, consider using the faster round_up(). - */ -#define roundup(x, y) ( \ -{ \ - typeof(y) __y = y; \ - (((x) + (__y - 1)) / __y) * __y; \ -} \ -) -/** - * rounddown - round down to next specified multiple - * @x: the value to round - * @y: multiple to round down to - * - * Rounds @x down to next multiple of @y. If @y will always be a power - * of 2, consider using the faster round_down(). - */ -#define rounddown(x, y) ( \ -{ \ - typeof(x) __x = (x); \ - __x - (__x % (y)); \ -} \ -) - -/* - * Divide positive or negative dividend by positive or negative divisor - * and round to closest integer. Result is undefined for negative - * divisors if the dividend variable type is unsigned and for negative - * dividends if the divisor variable type is unsigned. - */ -#define DIV_ROUND_CLOSEST(x, divisor)( \ -{ \ - typeof(x) __x = x; \ - typeof(divisor) __d = divisor; \ - (((typeof(x))-1) > 0 || \ - ((typeof(divisor))-1) > 0 || \ - (((__x) > 0) == ((__d) > 0))) ? \ - (((__x) + ((__d) / 2)) / (__d)) : \ - (((__x) - ((__d) / 2)) / (__d)); \ -} \ -) -/* - * Same as above but for u64 dividends. divisor must be a 32-bit - * number. - */ -#define DIV_ROUND_CLOSEST_ULL(x, divisor)( \ -{ \ - typeof(divisor) __d = divisor; \ - unsigned long long _tmp = (x) + (__d) / 2; \ - do_div(_tmp, __d); \ - _tmp; \ -} \ -) - -/* - * Multiplies an integer by a fraction, while avoiding unnecessary - * overflow or loss of precision. - */ -#define mult_frac(x, numer, denom)( \ -{ \ - typeof(x) quot = (x) / (denom); \ - typeof(x) rem = (x) % (denom); \ - (quot * (numer)) + ((rem * (numer)) / (denom)); \ -} \ -) - - #define _RET_IP_ (unsigned long)__builtin_return_address(0) #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) -#define sector_div(a, b) do_div(a, b) - /** * upper_32_bits - return bits 32-63 of a number * @n: the number we're accessing @@ -272,48 +159,6 @@ extern void __cant_migrate(const char *file, int line); #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) -/** - * abs - return absolute value of an argument - * @x: the value. If it is unsigned type, it is converted to signed type first. - * char is treated as if it was signed (regardless of whether it really is) - * but the macro's return type is preserved as char. - * - * Return: an absolute value of x. - */ -#define abs(x) __abs_choose_expr(x, long long, \ - __abs_choose_expr(x, long, \ - __abs_choose_expr(x, int, \ - __abs_choose_expr(x, short, \ - __abs_choose_expr(x, char, \ - __builtin_choose_expr( \ - __builtin_types_compatible_p(typeof(x), char), \ - (char)({ signed char __x = (x); __x<0?-__x:__x; }), \ - ((void)0))))))) - -#define __abs_choose_expr(x, type, other) __builtin_choose_expr( \ - __builtin_types_compatible_p(typeof(x), signed type) || \ - __builtin_types_compatible_p(typeof(x), unsigned type), \ - ({ signed type __x = (x); __x < 0 ? -__x : __x; }), other) - -/** - * reciprocal_scale - "scale" a value into range [0, ep_ro) - * @val: value - * @ep_ro: right open interval endpoint - * - * Perform a "reciprocal multiplication" in order to "scale" a value into - * range [0, @ep_ro), where the upper interval endpoint is right-open. - * This is useful, e.g. for accessing a index of an array containing - * @ep_ro elements, for example. Think of it as sort of modulus, only that - * the result isn't that of modulo. ;) Note that if initial input is a - * small value, then result will return 0. - * - * Return: a result based on @val in interval [0, @ep_ro). - */ -static inline u32 reciprocal_scale(u32 val, u32 ep_ro) -{ - return (u32)(((u64) val * ep_ro) >> 32); -} - #if defined(CONFIG_MMU) && \ (defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)) #define might_fault() __might_fault(__FILE__, __LINE__) @@ -515,18 +360,6 @@ extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); extern int func_ptr_is_kernel_text(void *ptr); -u64 int_pow(u64 base, unsigned int exp); -unsigned long int_sqrt(unsigned long); - -#if BITS_PER_LONG < 64 -u32 int_sqrt64(u64 x); -#else -static inline u32 int_sqrt64(u64 x) -{ - return (u32)int_sqrt(x); -} -#endif - #ifdef CONFIG_SMP extern unsigned int sysctl_oops_all_cpu_backtrace; #else diff --git a/include/linux/math.h b/include/linux/math.h new file mode 100644 index 000000000000..53674a327e39 --- /dev/null +++ b/include/linux/math.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MATH_H +#define _LINUX_MATH_H + +#include +#include + +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) + +/** + * round_up - round up to next specified power of 2 + * @x: the value to round + * @y: multiple to round up to (must be a power of 2) + * + * Rounds @x up to next multiple of @y (which must be a power of 2). + * To perform arbitrary rounding up, use roundup() below. + */ +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) + +/** + * round_down - round down to next specified power of 2 + * @x: the value to round + * @y: multiple to round down to (must be a power of 2) + * + * Rounds @x down to next multiple of @y (which must be a power of 2). + * To perform arbitrary rounding down, use rounddown() below. + */ +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + +#define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP + +#define DIV_ROUND_DOWN_ULL(ll, d) \ + ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; }) + +#define DIV_ROUND_UP_ULL(ll, d) \ + DIV_ROUND_DOWN_ULL((unsigned long long)(ll) + (d) - 1, (d)) + +#if BITS_PER_LONG == 32 +# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d) +#else +# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d) +#endif + +/** + * roundup - round up to the next specified multiple + * @x: the value to up + * @y: multiple to round up to + * + * Rounds @x up to next multiple of @y. If @y will always be a power + * of 2, consider using the faster round_up(). + */ +#define roundup(x, y) ( \ +{ \ + typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +/** + * rounddown - round down to next specified multiple + * @x: the value to round + * @y: multiple to round down to + * + * Rounds @x down to next multiple of @y. If @y will always be a power + * of 2, consider using the faster round_down(). + */ +#define rounddown(x, y) ( \ +{ \ + typeof(x) __x = (x); \ + __x - (__x % (y)); \ +} \ +) + +/* + * Divide positive or negative dividend by positive or negative divisor + * and round to closest integer. Result is undefined for negative + * divisors if the dividend variable type is unsigned and for negative + * dividends if the divisor variable type is unsigned. + */ +#define DIV_ROUND_CLOSEST(x, divisor)( \ +{ \ + typeof(x) __x = x; \ + typeof(divisor) __d = divisor; \ + (((typeof(x))-1) > 0 || \ + ((typeof(divisor))-1) > 0 || \ + (((__x) > 0) == ((__d) > 0))) ? \ + (((__x) + ((__d) / 2)) / (__d)) : \ + (((__x) - ((__d) / 2)) / (__d)); \ +} \ +) +/* + * Same as above but for u64 dividends. divisor must be a 32-bit + * number. + */ +#define DIV_ROUND_CLOSEST_ULL(x, divisor)( \ +{ \ + typeof(divisor) __d = divisor; \ + unsigned long long _tmp = (x) + (__d) / 2; \ + do_div(_tmp, __d); \ + _tmp; \ +} \ +) + +/* + * Multiplies an integer by a fraction, while avoiding unnecessary + * overflow or loss of precision. + */ +#define mult_frac(x, numer, denom)( \ +{ \ + typeof(x) quot = (x) / (denom); \ + typeof(x) rem = (x) % (denom); \ + (quot * (numer)) + ((rem * (numer)) / (denom)); \ +} \ +) + +#define sector_div(a, b) do_div(a, b) + +/** + * abs - return absolute value of an argument + * @x: the value. If it is unsigned type, it is converted to signed type first. + * char is treated as if it was signed (regardless of whether it really is) + * but the macro's return type is preserved as char. + * + * Return: an absolute value of x. + */ +#define abs(x) __abs_choose_expr(x, long long, \ + __abs_choose_expr(x, long, \ + __abs_choose_expr(x, int, \ + __abs_choose_expr(x, short, \ + __abs_choose_expr(x, char, \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), char), \ + (char)({ signed char __x = (x); __x<0?-__x:__x; }), \ + ((void)0))))))) + +#define __abs_choose_expr(x, type, other) __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), signed type) || \ + __builtin_types_compatible_p(typeof(x), unsigned type), \ + ({ signed type __x = (x); __x < 0 ? -__x : __x; }), other) + +/** + * reciprocal_scale - "scale" a value into range [0, ep_ro) + * @val: value + * @ep_ro: right open interval endpoint + * + * Perform a "reciprocal multiplication" in order to "scale" a value into + * range [0, @ep_ro), where the upper interval endpoint is right-open. + * This is useful, e.g. for accessing a index of an array containing + * @ep_ro elements, for example. Think of it as sort of modulus, only that + * the result isn't that of modulo. ;) Note that if initial input is a + * small value, then result will return 0. + * + * Return: a result based on @val in interval [0, @ep_ro). + */ +static inline u32 reciprocal_scale(u32 val, u32 ep_ro) +{ + return (u32)(((u64) val * ep_ro) >> 32); +} + +u64 int_pow(u64 base, unsigned int exp); +unsigned long int_sqrt(unsigned long); + +#if BITS_PER_LONG < 64 +u32 int_sqrt64(u64 x); +#else +static inline u32 int_sqrt64(u64 x) +{ + return (u32)int_sqrt(x); +} +#endif + +#endif /* _LINUX_MATH_H */ diff --git a/include/linux/rcu_node_tree.h b/include/linux/rcu_node_tree.h index b8e094b125ee..78feb8ba7358 100644 --- a/include/linux/rcu_node_tree.h +++ b/include/linux/rcu_node_tree.h @@ -20,6 +20,8 @@ #ifndef __LINUX_RCU_NODE_TREE_H #define __LINUX_RCU_NODE_TREE_H +#include + /* * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and * CONFIG_RCU_FANOUT_LEAF. diff --git a/include/linux/units.h b/include/linux/units.h index aaf716364ec3..5c115c809507 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -2,7 +2,7 @@ #ifndef _LINUX_UNITS_H #define _LINUX_UNITS_H -#include +#include #define ABSOLUTE_ZERO_MILLICELSIUS -273150 -- cgit v1.2.3 From 0bb867795540a9223d44ddcdf478330cba5917f8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 15 Dec 2020 20:42:55 -0800 Subject: include/linux/bitmap.h: convert bitmap_empty() / bitmap_full() to return boolean There is no need to return int type out of boolean expression. Link: https://lkml.kernel.org/r/20201027180936.20806-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Yury Norov Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 99058eb81042..719fe036f567 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -379,7 +379,7 @@ static inline int bitmap_subset(const unsigned long *src1, return __bitmap_subset(src1, src2, nbits); } -static inline int bitmap_empty(const unsigned long *src, unsigned nbits) +static inline bool bitmap_empty(const unsigned long *src, unsigned nbits) { if (small_const_nbits(nbits)) return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); @@ -387,7 +387,7 @@ static inline int bitmap_empty(const unsigned long *src, unsigned nbits) return find_first_bit(src, nbits) == nbits; } -static inline int bitmap_full(const unsigned long *src, unsigned int nbits) +static inline bool bitmap_full(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); -- cgit v1.2.3 From ab7d7798dad5aae23bb502f1a6fc0d637b07dc47 Mon Sep 17 00:00:00 2001 From: "Ma, Jianpeng" Date: Tue, 15 Dec 2020 20:42:57 -0800 Subject: bitmap: remove unused function declaration Link: https://lkml.kernel.org/r/BN7PR11MB26097166B6B46387D8A1ABA4FDE30@BN7PR11MB2609.namprd11.prod.outlook.com Fixes: 2afe27c718b6 ("lib/bitmap.c: bitmap_[empty,full]: remove code duplication") Signed-off-by: Jianpeng Ma Acked-by: Yury Norov Reviewed-by: Andy Shevchenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 719fe036f567..70a932470b2d 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -126,8 +126,6 @@ extern void bitmap_free(const unsigned long *bitmap); * lib/bitmap.c provides these functions: */ -extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits); -extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits); extern int __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern bool __pure __bitmap_or_equal(const unsigned long *src1, -- cgit v1.2.3 From 2f78788b55baa3410b1ec91a576286abe1ad4d6a Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 15 Dec 2020 20:43:37 -0800 Subject: ilog2: improve ilog2 for constant arguments As discussed in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97445 the const_ilog2 macro generates a lot of code which interferes badly with GCC inlining heuristics, until it can be proven that the ilog2 argument can or can't be simplified into a constant. It can be expressed using __builtin_clzll builtin which is supported by GCC 3.4 and later and when used only in the __builtin_constant_p guarded code it ought to always fold back to a constant. Other compilers support the same builtin for many years too. Other option would be to change the const_ilog2 macro, though as the description says it is meant to be used also in C constant expressions, and while GCC will fold it to constant with constant argument even in those, perhaps it is better to avoid using extensions in that case. [akpm@linux-foundation.org: coding style fixes] Link: https://lkml.kernel.org/r/20201120125154.GB3040@hirez.programming.kicks-ass.net Link: https://lkml.kernel.org/r/20201021132718.GB2176@tucnak Signed-off-by: Jakub Jelinek Signed-off-by: Peter Zijlstra (Intel) Cc: Christophe Leroy Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/log2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/log2.h b/include/linux/log2.h index c619ec6eff4a..df0b155c2141 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -156,7 +156,8 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define ilog2(n) \ ( \ __builtin_constant_p(n) ? \ - const_ilog2(n) : \ + ((n) < 2 ? 0 : \ + 63 - __builtin_clzll(n)) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ -- cgit v1.2.3 From 6a39e62abbafd1d58d1722f40c7d26ef379c6a2f Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 15 Dec 2020 20:43:44 -0800 Subject: lib: string.h: detect intra-object overflow in fortified string functions Patch series "Fortify strscpy()", v7. This patch implements a fortified version of strscpy() enabled by setting CONFIG_FORTIFY_SOURCE=y. The new version ensures the following before calling vanilla strscpy(): 1. There is no read overflow because either size is smaller than src length or we shrink size to src length by calling fortified strnlen(). 2. There is no write overflow because we either failed during compilation or at runtime by checking that size is smaller than dest size. Note that, if src and dst size cannot be got, the patch defaults to call vanilla strscpy(). The patches adds the following: 1. Implement the fortified version of strscpy(). 2. Add a new LKDTM test to ensures the fortified version still returns the same value as the vanilla one while panic'ing when there is a write overflow. 3. Correct some typos in LKDTM related file. I based my modifications on top of two patches from Daniel Axtens which modify calls to __builtin_object_size, in fortified string functions, to ensure the true size of char * are returned and not the surrounding structure size. About performance, I measured the slow down of fortified strscpy(), using the vanilla one as baseline. The hardware I used is an Intel i3 2130 CPU clocked at 3.4 GHz. I ran "Linux 5.10.0-rc4+ SMP PREEMPT" inside qemu 3.10 with 4 CPU cores. The following code, called through LKDTM, was used as a benchmark: #define TIMES 10000 char *src; char dst[7]; int i; ktime_t begin; src = kstrdup("foobar", GFP_KERNEL); if (src == NULL) return; begin = ktime_get(); for (i = 0; i < TIMES; i++) strscpy(dst, src, strlen(src)); pr_info("%d fortified strscpy() tooks %lld", TIMES, ktime_get() - begin); begin = ktime_get(); for (i = 0; i < TIMES; i++) __real_strscpy(dst, src, strlen(src)); pr_info("%d vanilla strscpy() tooks %lld", TIMES, ktime_get() - begin); kfree(src); I called the above code 30 times to compute stats for each version (in ns, round to int): | version | mean | std | median | 95th | | --------- | ------- | ------ | ------- | ------- | | fortified | 245_069 | 54_657 | 216_230 | 331_122 | | vanilla | 172_501 | 70_281 | 143_539 | 219_553 | On average, fortified strscpy() is approximately 1.42 times slower than vanilla strscpy(). For the 95th percentile, the fortified version is about 1.50 times slower. So, clearly the stats are not in favor of fortified strscpy(). But, the fortified version loops the string twice (one in strnlen() and another in vanilla strscpy()) while the vanilla one only loops once. This can explain why fortified strscpy() is slower than the vanilla one. This patch (of 5): When the fortify feature was first introduced in commit 6974f0c4555e ("include/linux/string.h: add the option of fortified string.h functions"), Daniel Micay observed: * It should be possible to optionally use __builtin_object_size(x, 1) for some functions (C strings) to detect intra-object overflows (like glibc's _FORTIFY_SOURCE=2), but for now this takes the conservative approach to avoid likely compatibility issues. This is a case that often cannot be caught by KASAN. Consider: struct foo { char a[10]; char b[10]; } void test() { char *msg; struct foo foo; msg = kmalloc(16, GFP_KERNEL); strcpy(msg, "Hello world!!"); // this copy overwrites foo.b strcpy(foo.a, msg); } The questionable copy overflows foo.a and writes to foo.b as well. It cannot be detected by KASAN. Currently it is also not detected by fortify, because strcpy considers __builtin_object_size(x, 0), which considers the size of the surrounding object (here, struct foo). However, if we switch the string functions over to use __builtin_object_size(x, 1), the compiler will measure the size of the closest surrounding subobject (here, foo.a), rather than the size of the surrounding object as a whole. See https://gcc.gnu.org/onlinedocs/gcc/Object-Size-Checking.html for more info. Only do this for string functions: we cannot use it on things like memcpy, memmove, memcmp and memchr_inv due to code like this which purposefully operates on multiple structure members: (arch/x86/kernel/traps.c) /* * regs->sp points to the failing IRET frame on the * ESPFIX64 stack. Copy it to the entry stack. This fills * in gpregs->ss through gpregs->ip. * */ memmove(&gpregs->ip, (void *)regs->sp, 5*8); This change passes an allyesconfig on powerpc and x86, and an x86 kernel built with it survives running with syz-stress from syzkaller, so it seems safe so far. Link: https://lkml.kernel.org/r/20201122162451.27551-1-laniel_francis@privacyrequired.com Link: https://lkml.kernel.org/r/20201122162451.27551-2-laniel_francis@privacyrequired.com Signed-off-by: Daniel Axtens Signed-off-by: Francis Laniel Reviewed-by: Kees Cook Cc: Daniel Micay Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index b1f3894a0a3e..46e91d684c47 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -292,7 +292,7 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __builtin_object_size(p, 1); if (__builtin_constant_p(size) && p_size < size) __write_overflow(); if (p_size < size) @@ -302,7 +302,7 @@ __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) __FORTIFY_INLINE char *strcat(char *p, const char *q) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __builtin_object_size(p, 1); if (p_size == (size_t)-1) return __underlying_strcat(p, q); if (strlcat(p, q, p_size) >= p_size) @@ -313,7 +313,7 @@ __FORTIFY_INLINE char *strcat(char *p, const char *q) __FORTIFY_INLINE __kernel_size_t strlen(const char *p) { __kernel_size_t ret; - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __builtin_object_size(p, 1); /* Work around gcc excess stack consumption issue */ if (p_size == (size_t)-1 || @@ -328,7 +328,7 @@ __FORTIFY_INLINE __kernel_size_t strlen(const char *p) extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); __FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __builtin_object_size(p, 1); __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); if (p_size <= ret && maxlen != ret) fortify_panic(__func__); @@ -340,8 +340,8 @@ extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) { size_t ret; - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); + size_t p_size = __builtin_object_size(p, 1); + size_t q_size = __builtin_object_size(q, 1); if (p_size == (size_t)-1 && q_size == (size_t)-1) return __real_strlcpy(p, q, size); ret = strlen(q); @@ -361,8 +361,8 @@ __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) __FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count) { size_t p_len, copy_len; - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); + size_t p_size = __builtin_object_size(p, 1); + size_t q_size = __builtin_object_size(q, 1); if (p_size == (size_t)-1 && q_size == (size_t)-1) return __underlying_strncat(p, q, count); p_len = strlen(p); @@ -475,11 +475,16 @@ __FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp) /* defined after fortified strlen and memcpy to reuse them */ __FORTIFY_INLINE char *strcpy(char *p, const char *q) { - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); + size_t p_size = __builtin_object_size(p, 1); + size_t q_size = __builtin_object_size(q, 1); + size_t size; if (p_size == (size_t)-1 && q_size == (size_t)-1) return __underlying_strcpy(p, q); - memcpy(p, q, strlen(q) + 1); + size = strlen(q) + 1; + /* test here to use the more stringent object size */ + if (p_size < size) + fortify_panic(__func__); + memcpy(p, q, size); return p; } -- cgit v1.2.3 From 33e56a59e64dfb68778e5da0be13f0c47dc5d445 Mon Sep 17 00:00:00 2001 From: Francis Laniel Date: Tue, 15 Dec 2020 20:43:50 -0800 Subject: string.h: add FORTIFY coverage for strscpy() The fortified version of strscpy ensures the following before vanilla strscpy is called: 1. There is no read overflow because we either size is smaller than src length or we shrink size to src length by calling fortified strnlen. 2. There is no write overflow because we either failed during compilation or at runtime by checking that size is smaller than dest size. Link: https://lkml.kernel.org/r/20201122162451.27551-4-laniel_francis@privacyrequired.com Signed-off-by: Francis Laniel Acked-by: Kees Cook Cc: Daniel Axtens Cc: Daniel Micay Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index 46e91d684c47..1cd63a8a23ab 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -6,6 +6,7 @@ #include /* for inline */ #include /* for size_t */ #include /* for NULL */ +#include /* for E2BIG */ #include #include @@ -357,6 +358,53 @@ __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) return ret; } +/* defined after fortified strnlen to reuse it */ +extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy); +__FORTIFY_INLINE ssize_t strscpy(char *p, const char *q, size_t size) +{ + size_t len; + /* Use string size rather than possible enclosing struct size. */ + size_t p_size = __builtin_object_size(p, 1); + size_t q_size = __builtin_object_size(q, 1); + + /* If we cannot get size of p and q default to call strscpy. */ + if (p_size == (size_t) -1 && q_size == (size_t) -1) + return __real_strscpy(p, q, size); + + /* + * If size can be known at compile time and is greater than + * p_size, generate a compile time write overflow error. + */ + if (__builtin_constant_p(size) && size > p_size) + __write_overflow(); + + /* + * This call protects from read overflow, because len will default to q + * length if it smaller than size. + */ + len = strnlen(q, size); + /* + * If len equals size, we will copy only size bytes which leads to + * -E2BIG being returned. + * Otherwise we will copy len + 1 because of the final '\O'. + */ + len = len == size ? size : len + 1; + + /* + * Generate a runtime write overflow error if len is greater than + * p_size. + */ + if (len > p_size) + fortify_panic(__func__); + + /* + * We can now safely call vanilla strscpy because we are protected from: + * 1. Read overflow thanks to call to strnlen(). + * 2. Write overflow thanks to above ifs. + */ + return __real_strscpy(p, q, len); +} + /* defined after fortified strlen and strnlen to reuse them */ __FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count) { -- cgit v1.2.3 From 5c7b3280d221b84a675b85cb2727df7d82b65c3a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 15 Dec 2020 20:45:34 -0800 Subject: rapidio: remove unused rio_get_asm() and rio_get_device() The functions rio_get_asm() and rio_get_device() are globally exported but have almost no users in tree. The only user is rio_init_mports() which invokes it via rio_init(). rio_init() iterates over every registered device and invokes rio_fixup_device(). It looks like a fixup function which should perform a "change" to the device but does nothing. It has been like this since its introduction in commit 394b701ce4fbf ("[PATCH] RapidIO support: core base") which was merged into v2.6.15-rc1. Remove rio_init() because the performed fixup function (rio_fixup_device()) does nothing. Remove rio_get_asm() and rio_get_device() which have no callers now. Link: https://lkml.kernel.org/r/20201116170004.420143-1-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Cc: Matt Porter Cc: Alexandre Bounine Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rio_drv.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index d637742e5039..e49c32b0f394 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -444,9 +444,6 @@ static inline void rio_set_drvdata(struct rio_dev *rdev, void *data) /* Misc driver helpers */ extern u16 rio_local_get_device_id(struct rio_mport *port); extern void rio_local_set_device_id(struct rio_mport *port, u16 did); -extern struct rio_dev *rio_get_device(u16 vid, u16 did, struct rio_dev *from); -extern struct rio_dev *rio_get_asm(u16 vid, u16 did, u16 asm_vid, u16 asm_did, - struct rio_dev *from); extern int rio_init_mports(void); #endif /* LINUX_RIO_DRV_H */ -- cgit v1.2.3 From 3d03295a7e9194c2318977b44999972ce3609664 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 15 Dec 2020 20:45:47 -0800 Subject: relay: remove unused buf_mapped and buf_unmapped callbacks Patch series "relay: cleanup and const callbacks", v2. None of the relay users require the use of mutable structs for callbacks, however the relay code does. Instead of assigning default callbacks when there is none, add callback wrappers to conditionally call the client callbacks if available, and fall back to default behaviour (typically no-op) otherwise. This lets all relay users make their struct rchan_callbacks const data. This series starts with a number of cleanups first based on Christoph's feedback. This patch (of 9): No relay client uses the buf_mapped or buf_unmapped callbacks. Remove them. This makes relay's vm_operations_struct close callback a dummy, remove it as well. Link: https://lkml.kernel.org/r/cover.1606153547.git.jani.nikula@intel.com Link: https://lkml.kernel.org/r/c69fff6e0cd485563604240bbfcc028434983bec.1606153547.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Cc: Jens Axboe Cc: Kalle Valo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/relay.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/relay.h b/include/linux/relay.h index e13a333e7c37..b3c4f49f6951 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -101,25 +101,6 @@ struct rchan_callbacks void *prev_subbuf, size_t prev_padding); - /* - * buf_mapped - relay buffer mmap notification - * @buf: the channel buffer - * @filp: relay file pointer - * - * Called when a relay file is successfully mmapped - */ - void (*buf_mapped)(struct rchan_buf *buf, - struct file *filp); - - /* - * buf_unmapped - relay buffer unmap notification - * @buf: the channel buffer - * @filp: relay file pointer - * - * Called when a relay file is successfully unmapped - */ - void (*buf_unmapped)(struct rchan_buf *buf, - struct file *filp); /* * create_buf_file - create file to represent a relay channel buffer * @filename: the name of the file to create -- cgit v1.2.3 From 371e03880d9d34534d3eafd2a7581042be598e39 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 15 Dec 2020 20:45:53 -0800 Subject: relay: make create_buf_file and remove_buf_file callbacks mandatory All clients provide create_buf_file and remove_buf_file callbacks, and they're required for relay to make sense. There is no point in them being optional. Also document whether each callback is mandatory/optional. Link: https://lkml.kernel.org/r/88003c1527386b93036e286e7917f1e33aec84ac.1606153547.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Cc: Jens Axboe Cc: Kalle Valo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/relay.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/relay.h b/include/linux/relay.h index b3c4f49f6951..99d024475ba5 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -89,6 +89,8 @@ struct rchan_callbacks * The client should return 1 to continue logging, 0 to stop * logging. * + * This callback is optional. + * * NOTE: subbuf_start will also be invoked when the buffer is * created, so that the first sub-buffer can be initialized * if necessary. In this case, prev_subbuf will be NULL. @@ -122,6 +124,8 @@ struct rchan_callbacks * cause relay_open() to create a single global buffer rather * than the default set of per-cpu buffers. * + * This callback is mandatory. + * * See Documentation/filesystems/relay.rst for more info. */ struct dentry *(*create_buf_file)(const char *filename, @@ -139,6 +143,8 @@ struct rchan_callbacks * channel buffer. * * The callback should return 0 if successful, negative if not. + * + * This callback is mandatory. */ int (*remove_buf_file)(struct dentry *dentry); }; -- cgit v1.2.3 From 023542f48b57d6b785fcadb86ac336ae80653e58 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 15 Dec 2020 20:45:57 -0800 Subject: relay: allow the use of const callback structs None of the relay users require the use of mutable structs for callbacks, however the relay code does. Instead of assigning the default callback for subbuf_start, add a wrapper to conditionally call the client callback if available, and fall back to default behaviour otherwise. This lets all relay users make their struct rchan_callbacks const data. [jani.nikula@intel.com: cleanups, per Christoph] Link: https://lkml.kernel.org/r/20201124115412.32402-1-jani.nikula@intel.com Link: https://lkml.kernel.org/r/cc3ff292e4eb4fdc56bee3d690c7b8e39209cd37.1606153547.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Reviewed-by: Christoph Hellwig Cc: Jens Axboe Cc: Kalle Valo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/relay.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/relay.h b/include/linux/relay.h index 99d024475ba5..72b876dd5cb8 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -62,7 +62,7 @@ struct rchan size_t subbuf_size; /* sub-buffer size */ size_t n_subbufs; /* number of sub-buffers per buffer */ size_t alloc_size; /* total buffer size allocated */ - struct rchan_callbacks *cb; /* client callbacks */ + const struct rchan_callbacks *cb; /* client callbacks */ struct kref kref; /* channel refcount */ void *private_data; /* for user-defined data */ size_t last_toobig; /* tried to log event > subbuf size */ @@ -157,7 +157,7 @@ struct rchan *relay_open(const char *base_filename, struct dentry *parent, size_t subbuf_size, size_t n_subbufs, - struct rchan_callbacks *cb, + const struct rchan_callbacks *cb, void *private_data); extern int relay_late_setup_files(struct rchan *chan, const char *base_filename, -- cgit v1.2.3 From ff5c19ed4b087073cea38ff0edc80c23d7256943 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Dec 2020 20:47:23 -0800 Subject: mm: simplify follow_pte{,pmd} Merge __follow_pte_pmd, follow_pte_pmd and follow_pte into a single follow_pte function and just pass two additional NULL arguments for the two previous follow_pte callers. [sfr@canb.auug.org.au: merge fix for "s390/pci: remove races against pte updates"] Link: https://lkml.kernel.org/r/20201111221254.7f6a3658@canb.auug.org.au Link: https://lkml.kernel.org/r/20201029101432.47011-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Matthew Wilcox (Oracle) Cc: Daniel Vetter Cc: Dan Williams Cc: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index abc7b3154298..855161080f18 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1641,9 +1641,9 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); -int follow_pte_pmd(struct mm_struct *mm, unsigned long address, - struct mmu_notifier_range *range, - pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); +int follow_pte(struct mm_struct *mm, unsigned long address, + struct mmu_notifier_range *range, pte_t **ptepp, pmd_t **pmdpp, + spinlock_t **ptlp); int follow_pfn(struct vm_area_struct *vma, unsigned long address, unsigned long *pfn); int follow_phys(struct vm_area_struct *vma, unsigned long address, -- cgit v1.2.3 From c18e68696fdd9fd293f051030bce5aaff3c9b185 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Mon, 14 Dec 2020 21:15:47 -0800 Subject: net/connector: Add const qualifier to cb_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The connector driver never modifies any cb_id passed to it, so add a const qualifier to those arguments so callers can declare their struct cb_id as a constant object. Fixes build warnings like these when passing a constant struct cb_id: warning: passing argument 1 of ‘cn_add_callback’ discards ‘const’ qualifier from pointer target Signed-off-by: Geoff Levand Link: https://lore.kernel.org/r/a9e49c9e-67fa-16e7-0a6b-72f6bd30c58a@infradead.org Signed-off-by: Jakub Kicinski --- include/linux/connector.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/connector.h b/include/linux/connector.h index cb732643471b..8ea860efea37 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -64,14 +64,14 @@ struct cn_dev { * @callback: connector's callback. * parameters are %cn_msg and the sender's credentials */ -int cn_add_callback(struct cb_id *id, const char *name, +int cn_add_callback(const struct cb_id *id, const char *name, void (*callback)(struct cn_msg *, struct netlink_skb_parms *)); /** * cn_del_callback() - Unregisters new callback with connector core. * * @id: unique connector's user identifier. */ -void cn_del_callback(struct cb_id *id); +void cn_del_callback(const struct cb_id *id); /** @@ -122,14 +122,14 @@ int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 group, gfp int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 group, gfp_t gfp_mask); int cn_queue_add_callback(struct cn_queue_dev *dev, const char *name, - struct cb_id *id, + const struct cb_id *id, void (*callback)(struct cn_msg *, struct netlink_skb_parms *)); -void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id); +void cn_queue_del_callback(struct cn_queue_dev *dev, const struct cb_id *id); void cn_queue_release_callback(struct cn_callback_entry *); struct cn_queue_dev *cn_queue_alloc_dev(const char *name, struct sock *); void cn_queue_free_dev(struct cn_queue_dev *dev); -int cn_cb_equal(struct cb_id *, struct cb_id *); +int cn_cb_equal(const struct cb_id *, const struct cb_id *); #endif /* __CONNECTOR_H */ -- cgit v1.2.3 From 7061eb8cfa902daa1ec71d23b5cddb8b4391e72b Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Mon, 14 Dec 2020 15:19:28 -0600 Subject: net: core: introduce __netdev_notify_peers There are some use cases for netdev_notify_peers in the context when rtnl lock is already held. Introduce lockless version of netdev_notify_peers call to save the extra code to call call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev); After that, convert netdev_notify_peers to call the new helper. Suggested-by: Nathan Lynch Signed-off-by: Lijun Pan Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7bf167993c05..259be67644e3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4547,6 +4547,7 @@ void __dev_set_rx_mode(struct net_device *dev); int dev_set_promiscuity(struct net_device *dev, int inc); int dev_set_allmulti(struct net_device *dev, int inc); void netdev_state_change(struct net_device *dev); +void __netdev_notify_peers(struct net_device *dev); void netdev_notify_peers(struct net_device *dev); void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ -- cgit v1.2.3 From 767143a18d6d743d4254de5cf55b1bd87bb2af18 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Dec 2020 22:37:50 -0800 Subject: phy: fix kdoc warning Kdoc does not like it when multiline comment follows the networking style of starting right on the first line: include/linux/phy.h:869: warning: Function parameter or member 'config_intr' not described in 'phy_driver' Link: https://lore.kernel.org/r/20201215063750.3120976-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 381a95732b6a..9effb511acde 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -743,7 +743,8 @@ struct phy_driver { /** @read_status: Determines the negotiated speed and duplex */ int (*read_status)(struct phy_device *phydev); - /** @config_intr: Enables or disables interrupts. + /** + * @config_intr: Enables or disables interrupts. * It should also clear any pending interrupts prior to enabling the * IRQs and after disabling them. */ -- cgit v1.2.3 From 3df23a316c4a5d1764b034c71c29d67a17d5299f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 5 Dec 2020 17:19:24 +0100 Subject: pwm: Remove unused function pwmchip_add_inversed() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is only defined with CONFIG_PWM unset and was introduced together with pwmchip_add_with_polarity() (which is only defined with CONFIG_PWM enabled). I guess the series that introduced pwmchip_add_with_polarity() had a different concept in earlier revisions and the !CONFIG_PWM part was just not updated accordingly. Given that there is no implementation for pwmchip_add_with_polarity() without CONFIG_PWM, just drop pwmchip_add_inversed() instead of renaming it to pwmchip_add_with_polarity(). Signed-off-by: Uwe Kleine-König Acked-by: Lee Jones Signed-off-by: Thierry Reding --- include/linux/pwm.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index a13ff383fa1d..e4d84d4db293 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -473,11 +473,6 @@ static inline int pwmchip_add(struct pwm_chip *chip) return -EINVAL; } -static inline int pwmchip_add_inversed(struct pwm_chip *chip) -{ - return -EINVAL; -} - static inline int pwmchip_remove(struct pwm_chip *chip) { return -EINVAL; -- cgit v1.2.3 From 49e27134f6e9ebcd08c04a98ab7f0574b5a81a35 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 13 Dec 2020 14:06:41 +0200 Subject: net/mlx5: Fix compilation warning for 32-bit platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MLX5_GENERAL_OBJECT_TYPES types bitfield is 64-bit field. Defining an enum for such bit fields on 32-bit platform results in below warning. ./include/vdso/bits.h:7:26: warning: left shift count >= width of type [-Wshift-count-overflow] ^ ./include/linux/mlx5/mlx5_ifc.h:10716:46: note: in expansion of macro ‘BIT’ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT(0x20), ^~~ Use 32-bit friendly BIT_ULL macro. Fixes: 2a2970891647 ("net/mlx5: Add sample offload hardware bits and structures") Signed-off-by: Parav Pandit Reported-by: Stephen Rothwell Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20201213120641.216032-1-leon@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 0d6e287d614f..8fbddec26eb8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10711,9 +10711,9 @@ struct mlx5_ifc_affiliated_event_header_bits { }; enum { - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT(0xc), - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT(0x13), - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT(0x20), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT_ULL(0xc), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT_ULL(0x13), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT_ULL(0x20), }; enum { -- cgit v1.2.3 From 9bd23c31f392bda88618008f27fd52ee9e0fac38 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Fri, 20 Nov 2020 12:22:32 -0800 Subject: jbd2: add a helper to find out number of fast commit blocks Add a helper to read number of fast commit blocks from jbd2 superblock and also rename the JBD2_MIN_FC_BLKS to JBD2_DEFAULT_FAST_COMMIT_BLOCKS since this constant is just the default number of fast commit blocks to use in case number of fast commit blocks isn't set in jbd2 superblock. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201120202232.2240293-2-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d2a4860feb72..99d3cd051ac3 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -68,7 +68,7 @@ extern void *jbd2_alloc(size_t size, gfp_t flags); extern void jbd2_free(void *ptr, size_t size); #define JBD2_MIN_JOURNAL_BLOCKS 1024 -#define JBD2_MIN_FC_BLOCKS 256 +#define JBD2_DEFAULT_FAST_COMMIT_BLOCKS 256 #ifdef __KERNEL__ @@ -1692,6 +1692,13 @@ static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) return journal->j_chksum_driver != NULL; } +static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb) +{ + int num_fc_blocks = be32_to_cpu(jsb->s_num_fc_blks); + + return num_fc_blocks ? num_fc_blocks : JBD2_DEFAULT_FAST_COMMIT_BLOCKS; +} + /* * Return number of free blocks in the log. Must be called under j_state_lock. */ -- cgit v1.2.3 From 476c135e321716ad7a8a5d4a19a636e2dcc50526 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 12 Nov 2020 08:39:59 +0200 Subject: vdpa: Add missing comment for virtqueue count Add missing comment for number of virtqueue. Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Acked-by: Jason Wang Link: https://lore.kernel.org/r/20201112064005.349268-2-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 30bc7a7223bb..0fefeb976877 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -42,6 +42,7 @@ struct vdpa_vq_state { * @config: the configuration ops for this device. * @index: device index * @features_valid: were features initialized? for legacy guests + * @nvqs: maximum number of supported virtqueues */ struct vdpa_device { struct device dev; -- cgit v1.2.3 From a4055888629bc0467d12d912cd7c90acdf3d9b12 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Fri, 18 Dec 2020 14:01:31 -0800 Subject: mm/memcg: warning on !memcg after readahead page charged Add VM_WARN_ON_ONCE_PAGE() macro. Since readahead page is charged on memcg too, in theory we don't have to check this exception now. Before safely remove them all, add a warning for the unexpected !memcg. Link: https://lkml.kernel.org/r/1604283436-18880-3-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi Acked-by: Michal Hocko Acked-by: Hugh Dickins Acked-by: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmdebug.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 2ad72d2c8cc5..5d0767cb424a 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -37,6 +37,18 @@ void dump_mm(const struct mm_struct *mm); BUG(); \ } \ } while (0) +#define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \ + static bool __section(".data.once") __warned; \ + int __ret_warn_once = !!(cond); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\ + __warned = true; \ + WARN_ON(1); \ + } \ + unlikely(__ret_warn_once); \ +}) + #define VM_WARN_ON(cond) (void)WARN_ON(cond) #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) #define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format) @@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm); #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond) #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) #endif -- cgit v1.2.3 From bec78efd0061365a76f88e498affd7106b256823 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 18 Dec 2020 14:01:35 -0800 Subject: mm/memcg: remove unused definitions Some definitions are left unused, just clean them. Link: https://lkml.kernel.org/r/20201108003834.12669-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Acked-by: Michal Hocko Reviewed-by: Shakeel Butt Reviewed-by: Roman Gushchin Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 118 --------------------------------------------- 1 file changed, 118 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 08ed57e02b73..196441f5dc99 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -913,41 +913,6 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, local_irq_restore(flags); } -/** - * mod_memcg_page_state - update page state statistics - * @page: the page - * @idx: page state item to account - * @val: number of pages (positive or negative) - * - * The @page must be locked or the caller must use lock_page_memcg() - * to prevent double accounting when the page is concurrently being - * moved to another memcg: - * - * lock_page(page) or lock_page_memcg(page) - * if (TestClearPageState(page)) - * mod_memcg_page_state(page, state, -1); - * unlock_page(page) or unlock_page_memcg(page) - * - * Kernel pages are an exception to this, since they'll never move. - */ -static inline void __mod_memcg_page_state(struct page *page, - int idx, int val) -{ - struct mem_cgroup *memcg = page_memcg(page); - - if (memcg) - __mod_memcg_state(memcg, idx, val); -} - -static inline void mod_memcg_page_state(struct page *page, - int idx, int val) -{ - struct mem_cgroup *memcg = page_memcg(page); - - if (memcg) - mod_memcg_state(memcg, idx, val); -} - static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { @@ -1395,18 +1360,6 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, { } -static inline void __mod_memcg_page_state(struct page *page, - int idx, - int nr) -{ -} - -static inline void mod_memcg_page_state(struct page *page, - int idx, - int nr) -{ -} - static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { @@ -1479,34 +1432,6 @@ static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) } #endif /* CONFIG_MEMCG */ -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void __inc_memcg_state(struct mem_cgroup *memcg, - int idx) -{ - __mod_memcg_state(memcg, idx, 1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void __dec_memcg_state(struct mem_cgroup *memcg, - int idx) -{ - __mod_memcg_state(memcg, idx, -1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void __inc_memcg_page_state(struct page *page, - int idx) -{ - __mod_memcg_page_state(page, idx, 1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void __dec_memcg_page_state(struct page *page, - int idx) -{ - __mod_memcg_page_state(page, idx, -1); -} - static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx) { __mod_lruvec_kmem_state(p, idx, 1); @@ -1517,34 +1442,6 @@ static inline void __dec_lruvec_kmem_state(void *p, enum node_stat_item idx) __mod_lruvec_kmem_state(p, idx, -1); } -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void inc_memcg_state(struct mem_cgroup *memcg, - int idx) -{ - mod_memcg_state(memcg, idx, 1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void dec_memcg_state(struct mem_cgroup *memcg, - int idx) -{ - mod_memcg_state(memcg, idx, -1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void inc_memcg_page_state(struct page *page, - int idx) -{ - mod_memcg_page_state(page, idx, 1); -} - -/* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void dec_memcg_page_state(struct page *page, - int idx) -{ - mod_memcg_page_state(page, idx, -1); -} - static inline struct lruvec *parent_lruvec(struct lruvec *lruvec) { struct mem_cgroup *memcg; @@ -1733,21 +1630,6 @@ static inline void memcg_kmem_uncharge_page(struct page *page, int order) __memcg_kmem_uncharge_page(page, order); } -static inline int memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp, - unsigned int nr_pages) -{ - if (memcg_kmem_enabled()) - return __memcg_kmem_charge(memcg, gfp, nr_pages); - return 0; -} - -static inline void memcg_kmem_uncharge(struct mem_cgroup *memcg, - unsigned int nr_pages) -{ - if (memcg_kmem_enabled()) - __memcg_kmem_uncharge(memcg, nr_pages); -} - /* * A helper for accessing memcg's kmem_id, used for getting * corresponding LRU lists. -- cgit v1.2.3 From 9a1ac2288cf16f9406ca54ef221bfcf262393b15 Mon Sep 17 00:00:00 2001 From: Hui Su Date: Fri, 18 Dec 2020 14:01:41 -0800 Subject: mm/memcontrol:rewrite mem_cgroup_page_lruvec() mem_cgroup_page_lruvec() in memcontrol.c and mem_cgroup_lruvec() in memcontrol.h is very similar except for the param(page and memcg) which also can be convert to each other. So rewrite mem_cgroup_page_lruvec() with mem_cgroup_lruvec(). [alex.shi@linux.alibaba.com: add missed warning in mem_cgroup_lruvec] Link: https://lkml.kernel.org/r/94f17bb7-ec61-5b72-3555-fabeb5a4d73b@linux.alibaba.com [lstoakes@gmail.com: warn on missing memcg on mem_cgroup_page_lruvec()] Link: https://lkml.kernel.org/r/20201125112202.387009-1-lstoakes@gmail.com Link: https://lkml.kernel.org/r/20201108143731.GA74138@rlk Signed-off-by: Hui Su Signed-off-by: Alex Shi Signed-off-by: Lorenzo Stoakes Acked-by: Michal Hocko Acked-by: Johannes Weiner Reviewed-by: Shakeel Butt Acked-by: Roman Gushchin Cc: Vladimir Davydov Cc: Yafang Shao Cc: Chris Down Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 196441f5dc99..d827bd7f3bfe 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -620,9 +620,10 @@ mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid) /** * mem_cgroup_lruvec - get the lru list vector for a memcg & node * @memcg: memcg of the wanted lruvec + * @pgdat: pglist_data * * Returns the lru list vector holding pages for a given @memcg & - * @node combination. This can be the node lruvec, if the memory + * @pgdat combination. This can be the node lruvec, if the memory * controller is disabled. */ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, @@ -652,7 +653,21 @@ out: return lruvec; } -struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *); +/** + * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page + * @page: the page + * @pgdat: pgdat of the page + * + * This function relies on page->mem_cgroup being stable. + */ +static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, + struct pglist_data *pgdat) +{ + struct mem_cgroup *memcg = page_memcg(page); + + VM_WARN_ON_ONCE_PAGE(!memcg, page); + return mem_cgroup_lruvec(memcg, pgdat); +} static inline bool lruvec_holds_page_lru_lock(struct page *page, struct lruvec *lruvec) -- cgit v1.2.3 From b0a0c2615f6f199a656ed8549d7dce625d77aa77 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 18 Dec 2020 14:05:41 -0800 Subject: epoll: wire up syscall epoll_pwait2 Split off from prev patch in the series that implements the syscall. Link: https://lkml.kernel.org/r/20201121144401.3727659-4-willemdebruijn.kernel@gmail.com Signed-off-by: Willem de Bruijn Cc: Al Viro Cc: Arnd Bergmann Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 6 ++++++ include/linux/syscalls.h | 5 +++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 400c0941c8af..6e65be753603 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -537,6 +537,12 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, int maxevents, int timeout, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); +asmlinkage long compat_sys_epoll_pwait2(int epfd, + struct epoll_event __user *events, + int maxevents, + const struct __kernel_timespec __user *timeout, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize); /* fs/fcntl.c */ asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index df0c3c74609e..f3929aff39cf 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -362,6 +362,11 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, int timeout, const sigset_t __user *sigmask, size_t sigsetsize); +asmlinkage long sys_epoll_pwait2(int epfd, struct epoll_event __user *events, + int maxevents, + const struct __kernel_timespec __user *timeout, + const sigset_t __user *sigmask, + size_t sigsetsize); /* fs/fcntl.c */ asmlinkage long sys_dup(unsigned int fildes); -- cgit v1.2.3 From 3b1a4a8640876a966ab68ab4f561642e19674671 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:14 -0800 Subject: kasan: group vmalloc code This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. Group all vmalloc-related function declarations in include/linux/kasan.h, and their implementations in mm/kasan/common.c. No functional changes. Link: https://lkml.kernel.org/r/80a6fdd29b039962843bd6cf22ce2643a7c8904e.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 30d343b4a40a..59538e795df4 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -75,19 +75,6 @@ struct kasan_cache { int free_meta_offset; }; -/* - * These functions provide a special case to support backing module - * allocations with real shadow memory. With KASAN vmalloc, the special - * case is unnecessary, as the work is handled in the generic case. - */ -#ifndef CONFIG_KASAN_VMALLOC -int kasan_module_alloc(void *addr, size_t size); -void kasan_free_shadow(const struct vm_struct *vm); -#else -static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } -static inline void kasan_free_shadow(const struct vm_struct *vm) {} -#endif - int kasan_add_zero_shadow(void *start, unsigned long size); void kasan_remove_zero_shadow(void *start, unsigned long size); @@ -156,9 +143,6 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object, return false; } -static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } -static inline void kasan_free_shadow(const struct vm_struct *vm) {} - static inline int kasan_add_zero_shadow(void *start, unsigned long size) { return 0; @@ -211,13 +195,16 @@ static inline void *kasan_reset_tag(const void *addr) #endif /* CONFIG_KASAN_SW_TAGS */ #ifdef CONFIG_KASAN_VMALLOC + int kasan_populate_vmalloc(unsigned long addr, unsigned long size); void kasan_poison_vmalloc(const void *start, unsigned long size); void kasan_unpoison_vmalloc(const void *start, unsigned long size); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); -#else + +#else /* CONFIG_KASAN_VMALLOC */ + static inline int kasan_populate_vmalloc(unsigned long start, unsigned long size) { @@ -232,7 +219,25 @@ static inline void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end) {} -#endif + +#endif /* CONFIG_KASAN_VMALLOC */ + +#if defined(CONFIG_KASAN) && !defined(CONFIG_KASAN_VMALLOC) + +/* + * These functions provide a special case to support backing module + * allocations with real shadow memory. With KASAN vmalloc, the special + * case is unnecessary, as the work is handled in the generic case. + */ +int kasan_module_alloc(void *addr, size_t size); +void kasan_free_shadow(const struct vm_struct *vm); + +#else /* CONFIG_KASAN && !CONFIG_KASAN_VMALLOC */ + +static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } +static inline void kasan_free_shadow(const struct vm_struct *vm) {} + +#endif /* CONFIG_KASAN && !CONFIG_KASAN_VMALLOC */ #ifdef CONFIG_KASAN_INLINE void kasan_non_canonical_hook(unsigned long addr); -- cgit v1.2.3 From d5750edf6da759576f91ec2b57d5553985815b40 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:17 -0800 Subject: kasan: shadow declarations only for software modes This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. Group shadow-related KASAN function declarations and only define them for the two existing software modes. No functional changes for software modes. Link: https://lkml.kernel.org/r/35126.1606402815@turing-police Link: https://lore.kernel.org/linux-arm-kernel/24105.1606397102@turing-police/ Link: https://lkml.kernel.org/r/e88d94eff94db883a65dca52e1736d80d28dd9bc.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Signed-off-by: Valdis Kletnieks Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon [valdis.kletnieks@vt.edu: fix build issue with asmlinkage] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 59538e795df4..7828436a3a99 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -11,7 +11,7 @@ struct task_struct; #ifdef CONFIG_KASAN -#include +#include #include /* kasan_data struct is used in KUnit tests for KASAN expected failures */ @@ -20,6 +20,20 @@ struct kunit_kasan_expectation { bool report_found; }; +#endif + +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) + +#include + +/* Software KASAN implementations use shadow memory. */ + +#ifdef CONFIG_KASAN_SW_TAGS +#define KASAN_SHADOW_INIT 0xFF +#else +#define KASAN_SHADOW_INIT 0 +#endif + extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE]; extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD]; @@ -35,6 +49,23 @@ static inline void *kasan_mem_to_shadow(const void *addr) + KASAN_SHADOW_OFFSET; } +int kasan_add_zero_shadow(void *start, unsigned long size); +void kasan_remove_zero_shadow(void *start, unsigned long size); + +#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + +static inline int kasan_add_zero_shadow(void *start, unsigned long size) +{ + return 0; +} +static inline void kasan_remove_zero_shadow(void *start, + unsigned long size) +{} + +#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + +#ifdef CONFIG_KASAN + /* Enable reporting bugs after kasan_disable_current() */ extern void kasan_enable_current(void); @@ -75,9 +106,6 @@ struct kasan_cache { int free_meta_offset; }; -int kasan_add_zero_shadow(void *start, unsigned long size); -void kasan_remove_zero_shadow(void *start, unsigned long size); - size_t __ksize(const void *); static inline void kasan_unpoison_slab(const void *ptr) { @@ -143,14 +171,6 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object, return false; } -static inline int kasan_add_zero_shadow(void *start, unsigned long size) -{ - return 0; -} -static inline void kasan_remove_zero_shadow(void *start, - unsigned long size) -{} - static inline void kasan_unpoison_slab(const void *ptr) { } static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } @@ -158,8 +178,6 @@ static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } #ifdef CONFIG_KASAN_GENERIC -#define KASAN_SHADOW_INIT 0 - void kasan_cache_shrink(struct kmem_cache *cache); void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_record_aux_stack(void *ptr); @@ -174,8 +192,6 @@ static inline void kasan_record_aux_stack(void *ptr) {} #ifdef CONFIG_KASAN_SW_TAGS -#define KASAN_SHADOW_INIT 0xFF - void kasan_init_tags(void); void *kasan_reset_tag(const void *addr); -- cgit v1.2.3 From cebd0eb29acdfc2f5e44e5f356ffcd0c44f16b4a Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:21 -0800 Subject: kasan: rename (un)poison_shadow to (un)poison_range This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. The new mode won't be using shadow memory. Rename external annotation kasan_unpoison_shadow() to kasan_unpoison_range(), and introduce internal functions (un)poison_range() (without kasan_ prefix). Co-developed-by: Marco Elver Link: https://lkml.kernel.org/r/fccdcaa13dc6b2211bf363d6c6d499279a54fe3a.1606161801.git.andreyknvl@google.com Signed-off-by: Marco Elver Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 7828436a3a99..9740c06a04a1 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -72,7 +72,7 @@ extern void kasan_enable_current(void); /* Disable reporting bugs for current task */ extern void kasan_disable_current(void); -void kasan_unpoison_shadow(const void *address, size_t size); +void kasan_unpoison_range(const void *address, size_t size); void kasan_unpoison_task_stack(struct task_struct *task); @@ -109,7 +109,7 @@ struct kasan_cache { size_t __ksize(const void *); static inline void kasan_unpoison_slab(const void *ptr) { - kasan_unpoison_shadow(ptr, __ksize(ptr)); + kasan_unpoison_range(ptr, __ksize(ptr)); } size_t kasan_metadata_size(struct kmem_cache *cache); @@ -118,7 +118,7 @@ void kasan_restore_multi_shot(bool enabled); #else /* CONFIG_KASAN */ -static inline void kasan_unpoison_shadow(const void *address, size_t size) {} +static inline void kasan_unpoison_range(const void *address, size_t size) {} static inline void kasan_unpoison_task_stack(struct task_struct *task) {} -- cgit v1.2.3 From d73b49365ee65ac48074bdb5aa717bb4644dbbb7 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:00:56 -0800 Subject: kasan, arm64: only use kasan_depth for software modes This is a preparatory commit for the upcoming addition of a new hardware tag-based (MTE-based) KASAN mode. Hardware tag-based KASAN won't use kasan_depth. Only define and use it when one of the software KASAN modes are enabled. No functional changes for software modes. Link: https://lkml.kernel.org/r/e16f15aeda90bc7fb4dfc2e243a14b74cc5c8219.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 18 +++++++++--------- include/linux/sched.h | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 9740c06a04a1..b272960e8396 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -52,6 +52,12 @@ static inline void *kasan_mem_to_shadow(const void *addr) int kasan_add_zero_shadow(void *start, unsigned long size); void kasan_remove_zero_shadow(void *start, unsigned long size); +/* Enable reporting bugs after kasan_disable_current() */ +extern void kasan_enable_current(void); + +/* Disable reporting bugs for current task */ +extern void kasan_disable_current(void); + #else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ static inline int kasan_add_zero_shadow(void *start, unsigned long size) @@ -62,16 +68,13 @@ static inline void kasan_remove_zero_shadow(void *start, unsigned long size) {} +static inline void kasan_enable_current(void) {} +static inline void kasan_disable_current(void) {} + #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ #ifdef CONFIG_KASAN -/* Enable reporting bugs after kasan_disable_current() */ -extern void kasan_enable_current(void); - -/* Disable reporting bugs for current task */ -extern void kasan_disable_current(void); - void kasan_unpoison_range(const void *address, size_t size); void kasan_unpoison_task_stack(struct task_struct *task); @@ -122,9 +125,6 @@ static inline void kasan_unpoison_range(const void *address, size_t size) {} static inline void kasan_unpoison_task_stack(struct task_struct *task) {} -static inline void kasan_enable_current(void) {} -static inline void kasan_disable_current(void) {} - static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} diff --git a/include/linux/sched.h b/include/linux/sched.h index 51d535b69bd6..6e3a5eeec509 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1234,7 +1234,7 @@ struct task_struct { u64 timer_slack_ns; u64 default_timer_slack_ns; -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) unsigned int kasan_depth; #endif -- cgit v1.2.3 From 60a3a5fe950f4e6c02e9fc6676dc96de043ed743 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:01:03 -0800 Subject: kasan, arm64: rename kasan_init_tags and mark as __init Rename kasan_init_tags() to kasan_init_sw_tags() as the upcoming hardware tag-based KASAN mode will have its own initialization routine. Also similarly to kasan_init() mark kasan_init_tags() as __init. Link: https://lkml.kernel.org/r/71e52af72a09f4b50c8042f16101c60e50649fbb.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b272960e8396..d7042d129dc1 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -192,7 +192,7 @@ static inline void kasan_record_aux_stack(void *ptr) {} #ifdef CONFIG_KASAN_SW_TAGS -void kasan_init_tags(void); +void __init kasan_init_sw_tags(void); void *kasan_reset_tag(const void *addr); @@ -201,7 +201,7 @@ bool kasan_report(unsigned long addr, size_t size, #else /* CONFIG_KASAN_SW_TAGS */ -static inline void kasan_init_tags(void) { } +static inline void kasan_init_sw_tags(void) { } static inline void *kasan_reset_tag(const void *addr) { -- cgit v1.2.3 From 0fea6e9af889f1a4e072f5de999e07fe6859fc88 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:06 -0800 Subject: kasan, arm64: expand CONFIG_KASAN checks Some #ifdef CONFIG_KASAN checks are only relevant for software KASAN modes (either related to shadow memory or compiler instrumentation). Expand those into CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS. Link: https://lkml.kernel.org/r/e6971e432dbd72bb897ff14134ebb7e169bdcf0c.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan-checks.h | 2 +- include/linux/kasan.h | 7 ++++--- include/linux/moduleloader.h | 3 ++- include/linux/string.h | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan-checks.h b/include/linux/kasan-checks.h index ac6aba632f2d..ca5e89fb10d3 100644 --- a/include/linux/kasan-checks.h +++ b/include/linux/kasan-checks.h @@ -9,7 +9,7 @@ * even in compilation units that selectively disable KASAN, but must use KASAN * to validate access to an address. Never use these in header files! */ -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) bool __kasan_check_read(const volatile void *p, unsigned int size); bool __kasan_check_write(const volatile void *p, unsigned int size); #else diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d7042d129dc1..b1381ee6922a 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -238,7 +238,8 @@ static inline void kasan_release_vmalloc(unsigned long start, #endif /* CONFIG_KASAN_VMALLOC */ -#if defined(CONFIG_KASAN) && !defined(CONFIG_KASAN_VMALLOC) +#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ + !defined(CONFIG_KASAN_VMALLOC) /* * These functions provide a special case to support backing module @@ -248,12 +249,12 @@ static inline void kasan_release_vmalloc(unsigned long start, int kasan_module_alloc(void *addr, size_t size); void kasan_free_shadow(const struct vm_struct *vm); -#else /* CONFIG_KASAN && !CONFIG_KASAN_VMALLOC */ +#else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} -#endif /* CONFIG_KASAN && !CONFIG_KASAN_VMALLOC */ +#endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ #ifdef CONFIG_KASAN_INLINE void kasan_non_canonical_hook(unsigned long addr); diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 4fa67a8b2265..9e09d11ffe5b 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -96,7 +96,8 @@ void module_arch_cleanup(struct module *mod); /* Any cleanup before freeing mod->module_init */ void module_arch_freeing_init(struct module *mod); -#if defined(CONFIG_KASAN) && !defined(CONFIG_KASAN_VMALLOC) +#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ + !defined(CONFIG_KASAN_VMALLOC) #include #define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) #else diff --git a/include/linux/string.h b/include/linux/string.h index 1cd63a8a23ab..4fcfb56abcf5 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -267,7 +267,7 @@ void __write_overflow(void) __compiletime_error("detected write beyond size of o #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr); extern int __underlying_memcmp(const void *p, const void *q, __kernel_size_t size) __RENAME(memcmp); extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(memcpy); -- cgit v1.2.3 From 2e903b91479782b7dedd869603423d77e079d3de Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:10 -0800 Subject: kasan, arm64: implement HW_TAGS runtime Provide implementation of KASAN functions required for the hardware tag-based mode. Those include core functions for memory and pointer tagging (tags_hw.c) and bug reporting (report_tags_hw.c). Also adapt common KASAN code to support the new mode. Link: https://lkml.kernel.org/r/cfd0fbede579a6b66755c98c88c108e54f9c56bf.1606161801.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Acked-by: Catalin Marinas Reviewed-by: Alexander Potapenko Tested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Marco Elver Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 24 +++++++++++++++++------- include/linux/mm.h | 2 +- include/linux/page-flags-layout.h | 2 +- 3 files changed, 19 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b1381ee6922a..d22ec4c9c1bd 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -190,25 +190,35 @@ static inline void kasan_record_aux_stack(void *ptr) {} #endif /* CONFIG_KASAN_GENERIC */ -#ifdef CONFIG_KASAN_SW_TAGS - -void __init kasan_init_sw_tags(void); +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) void *kasan_reset_tag(const void *addr); bool kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); -#else /* CONFIG_KASAN_SW_TAGS */ - -static inline void kasan_init_sw_tags(void) { } +#else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ static inline void *kasan_reset_tag(const void *addr) { return (void *)addr; } -#endif /* CONFIG_KASAN_SW_TAGS */ +#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS*/ + +#ifdef CONFIG_KASAN_SW_TAGS +void __init kasan_init_sw_tags(void); +#else +static inline void kasan_init_sw_tags(void) { } +#endif + +#ifdef CONFIG_KASAN_HW_TAGS +void kasan_init_hw_tags_cpu(void); +void __init kasan_init_hw_tags(void); +#else +static inline void kasan_init_hw_tags_cpu(void) { } +static inline void kasan_init_hw_tags(void) { } +#endif #ifdef CONFIG_KASAN_VMALLOC diff --git a/include/linux/mm.h b/include/linux/mm.h index 362579ad0758..024ec0a00c72 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1421,7 +1421,7 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) } #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_KASAN_SW_TAGS +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) static inline u8 page_kasan_tag(const struct page *page) { return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index e200eef6a7fd..7d4ec26d8a3e 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -77,7 +77,7 @@ #define LAST_CPUPID_SHIFT 0 #endif -#ifdef CONFIG_KASAN_SW_TAGS +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) #define KASAN_TAG_WIDTH 8 #else #define KASAN_TAG_WIDTH 0 -- cgit v1.2.3 From d56a9ef84bd0e1e8fba7a837ab12a4ec8476579f Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:42 -0800 Subject: kasan, arm64: unpoison stack only with CONFIG_KASAN_STACK There's a config option CONFIG_KASAN_STACK that has to be enabled for KASAN to use stack instrumentation and perform validity checks for stack variables. There's no need to unpoison stack when CONFIG_KASAN_STACK is not enabled. Only call kasan_unpoison_task_stack[_below]() when CONFIG_KASAN_STACK is enabled. Note, that CONFIG_KASAN_STACK is an option that is currently always defined when CONFIG_KASAN is enabled, and therefore has to be tested with #if instead of #ifdef. Link: https://lkml.kernel.org/r/d09dd3f8abb388da397fd11598c5edeaa83fe559.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/If8a891e9fe01ea543e00b576852685afec0887e3 Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Acked-by: Catalin Marinas Reviewed-by: Dmitry Vyukov Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d22ec4c9c1bd..e638255ce906 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -77,8 +77,6 @@ static inline void kasan_disable_current(void) {} void kasan_unpoison_range(const void *address, size_t size); -void kasan_unpoison_task_stack(struct task_struct *task); - void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); @@ -123,8 +121,6 @@ void kasan_restore_multi_shot(bool enabled); static inline void kasan_unpoison_range(const void *address, size_t size) {} -static inline void kasan_unpoison_task_stack(struct task_struct *task) {} - static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} @@ -176,6 +172,12 @@ static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } #endif /* CONFIG_KASAN */ +#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK +void kasan_unpoison_task_stack(struct task_struct *task); +#else +static inline void kasan_unpoison_task_stack(struct task_struct *task) {} +#endif + #ifdef CONFIG_KASAN_GENERIC void kasan_cache_shrink(struct kmem_cache *cache); -- cgit v1.2.3 From c0054c565ae598073d6c27762c7d4f7de49a45d9 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:52 -0800 Subject: kasan: inline kasan_reset_tag for tag-based modes Using kasan_reset_tag() currently results in a function call. As it's called quite often from the allocator code, this leads to a noticeable slowdown. Move it to include/linux/kasan.h and turn it into a static inline function. Also remove the now unneeded reset_tag() internal KASAN macro and use kasan_reset_tag() instead. Link: https://lkml.kernel.org/r/6940383a3a9dfb416134d338d8fac97a9ebb8686.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/I4d2061acfe91d480a75df00b07c22d8494ef14b5 Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Reviewed-by: Dmitry Vyukov Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index e638255ce906..3bb72de94f90 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -194,7 +194,10 @@ static inline void kasan_record_aux_stack(void *ptr) {} #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) -void *kasan_reset_tag(const void *addr); +static inline void *kasan_reset_tag(const void *addr) +{ + return (void *)arch_kasan_reset_tag(addr); +} bool kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); -- cgit v1.2.3 From bffe690708c8b4fdb8f0bff8ff22b347fc6c709a Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:02:59 -0800 Subject: kasan: open-code kasan_unpoison_slab There's the external annotation kasan_unpoison_slab() that is currently defined as static inline and uses kasan_unpoison_range(). Open-code this function in mempool.c. Otherwise with an upcoming change this function will result in an unnecessary function call. Link: https://lkml.kernel.org/r/131a6694a978a9a8b150187e539eecc8bcbf759b.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/Ia7c8b659f79209935cbaab3913bf7f082cc43a0e Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 3bb72de94f90..7350de3e9fe4 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -107,11 +107,6 @@ struct kasan_cache { int free_meta_offset; }; -size_t __ksize(const void *); -static inline void kasan_unpoison_slab(const void *ptr) -{ - kasan_unpoison_range(ptr, __ksize(ptr)); -} size_t kasan_metadata_size(struct kmem_cache *cache); bool kasan_save_enable_multi_shot(void); @@ -167,7 +162,6 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object, return false; } -static inline void kasan_unpoison_slab(const void *ptr) { } static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } #endif /* CONFIG_KASAN */ -- cgit v1.2.3 From 34303244f2615add92076a4bf2d4f39323bde4f2 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:10 -0800 Subject: kasan, mm: check kasan_enabled in annotations Declare the kasan_enabled static key in include/linux/kasan.h and in include/linux/mm.h and check it in all kasan annotations. This allows to avoid any slowdown caused by function calls when kasan_enabled is disabled. Link: https://lkml.kernel.org/r/9f90e3c0aa840dbb4833367c2335193299f69023.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/I2589451d3c96c97abbcbf714baabe6161c6f153e Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Reviewed-by: Dmitry Vyukov Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 213 +++++++++++++++++++++++++++++++++++++++----------- include/linux/mm.h | 22 ++++-- 2 files changed, 182 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 7350de3e9fe4..9176849c4934 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -2,6 +2,7 @@ #ifndef _LINUX_KASAN_H #define _LINUX_KASAN_H +#include #include struct kmem_cache; @@ -75,54 +76,176 @@ static inline void kasan_disable_current(void) {} #ifdef CONFIG_KASAN -void kasan_unpoison_range(const void *address, size_t size); +struct kasan_cache { + int alloc_meta_offset; + int free_meta_offset; +}; -void kasan_alloc_pages(struct page *page, unsigned int order); -void kasan_free_pages(struct page *page, unsigned int order); +#ifdef CONFIG_KASAN_HW_TAGS +DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); +static __always_inline bool kasan_enabled(void) +{ + return static_branch_likely(&kasan_flag_enabled); +} +#else +static inline bool kasan_enabled(void) +{ + return true; +} +#endif -void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, - slab_flags_t *flags); +void __kasan_unpoison_range(const void *addr, size_t size); +static __always_inline void kasan_unpoison_range(const void *addr, size_t size) +{ + if (kasan_enabled()) + __kasan_unpoison_range(addr, size); +} -void kasan_poison_slab(struct page *page); -void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); -void kasan_poison_object_data(struct kmem_cache *cache, void *object); -void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, - const void *object); +void __kasan_alloc_pages(struct page *page, unsigned int order); +static __always_inline void kasan_alloc_pages(struct page *page, + unsigned int order) +{ + if (kasan_enabled()) + __kasan_alloc_pages(page, order); +} -void * __must_check kasan_kmalloc_large(const void *ptr, size_t size, - gfp_t flags); -void kasan_kfree_large(void *ptr, unsigned long ip); -void kasan_poison_kfree(void *ptr, unsigned long ip); -void * __must_check kasan_kmalloc(struct kmem_cache *s, const void *object, - size_t size, gfp_t flags); -void * __must_check kasan_krealloc(const void *object, size_t new_size, - gfp_t flags); +void __kasan_free_pages(struct page *page, unsigned int order); +static __always_inline void kasan_free_pages(struct page *page, + unsigned int order) +{ + if (kasan_enabled()) + __kasan_free_pages(page, order); +} -void * __must_check kasan_slab_alloc(struct kmem_cache *s, void *object, - gfp_t flags); -bool kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip); +void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size, + slab_flags_t *flags); +static __always_inline void kasan_cache_create(struct kmem_cache *cache, + unsigned int *size, slab_flags_t *flags) +{ + if (kasan_enabled()) + __kasan_cache_create(cache, size, flags); +} -struct kasan_cache { - int alloc_meta_offset; - int free_meta_offset; -}; +size_t __kasan_metadata_size(struct kmem_cache *cache); +static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache) +{ + if (kasan_enabled()) + return __kasan_metadata_size(cache); + return 0; +} + +void __kasan_poison_slab(struct page *page); +static __always_inline void kasan_poison_slab(struct page *page) +{ + if (kasan_enabled()) + __kasan_poison_slab(page); +} + +void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object); +static __always_inline void kasan_unpoison_object_data(struct kmem_cache *cache, + void *object) +{ + if (kasan_enabled()) + __kasan_unpoison_object_data(cache, object); +} + +void __kasan_poison_object_data(struct kmem_cache *cache, void *object); +static __always_inline void kasan_poison_object_data(struct kmem_cache *cache, + void *object) +{ + if (kasan_enabled()) + __kasan_poison_object_data(cache, object); +} + +void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache, + const void *object); +static __always_inline void * __must_check kasan_init_slab_obj( + struct kmem_cache *cache, const void *object) +{ + if (kasan_enabled()) + return __kasan_init_slab_obj(cache, object); + return (void *)object; +} + +bool __kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip); +static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object, + unsigned long ip) +{ + if (kasan_enabled()) + return __kasan_slab_free(s, object, ip); + return false; +} + +void * __must_check __kasan_slab_alloc(struct kmem_cache *s, + void *object, gfp_t flags); +static __always_inline void * __must_check kasan_slab_alloc( + struct kmem_cache *s, void *object, gfp_t flags) +{ + if (kasan_enabled()) + return __kasan_slab_alloc(s, object, flags); + return object; +} + +void * __must_check __kasan_kmalloc(struct kmem_cache *s, const void *object, + size_t size, gfp_t flags); +static __always_inline void * __must_check kasan_kmalloc(struct kmem_cache *s, + const void *object, size_t size, gfp_t flags) +{ + if (kasan_enabled()) + return __kasan_kmalloc(s, object, size, flags); + return (void *)object; +} -size_t kasan_metadata_size(struct kmem_cache *cache); +void * __must_check __kasan_kmalloc_large(const void *ptr, + size_t size, gfp_t flags); +static __always_inline void * __must_check kasan_kmalloc_large(const void *ptr, + size_t size, gfp_t flags) +{ + if (kasan_enabled()) + return __kasan_kmalloc_large(ptr, size, flags); + return (void *)ptr; +} + +void * __must_check __kasan_krealloc(const void *object, + size_t new_size, gfp_t flags); +static __always_inline void * __must_check kasan_krealloc(const void *object, + size_t new_size, gfp_t flags) +{ + if (kasan_enabled()) + return __kasan_krealloc(object, new_size, flags); + return (void *)object; +} + +void __kasan_poison_kfree(void *ptr, unsigned long ip); +static __always_inline void kasan_poison_kfree(void *ptr, unsigned long ip) +{ + if (kasan_enabled()) + __kasan_poison_kfree(ptr, ip); +} + +void __kasan_kfree_large(void *ptr, unsigned long ip); +static __always_inline void kasan_kfree_large(void *ptr, unsigned long ip) +{ + if (kasan_enabled()) + __kasan_kfree_large(ptr, ip); +} bool kasan_save_enable_multi_shot(void); void kasan_restore_multi_shot(bool enabled); #else /* CONFIG_KASAN */ +static inline bool kasan_enabled(void) +{ + return false; +} static inline void kasan_unpoison_range(const void *address, size_t size) {} - static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} - static inline void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags) {} - +static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } static inline void kasan_poison_slab(struct page *page) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) {} @@ -133,36 +256,32 @@ static inline void *kasan_init_slab_obj(struct kmem_cache *cache, { return (void *)object; } - -static inline void *kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) +static inline bool kasan_slab_free(struct kmem_cache *s, void *object, + unsigned long ip) { - return ptr; + return false; +} +static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, + gfp_t flags) +{ + return object; } -static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} -static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {} static inline void *kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags) { return (void *)object; } +static inline void *kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags) +{ + return (void *)ptr; +} static inline void *kasan_krealloc(const void *object, size_t new_size, gfp_t flags) { return (void *)object; } - -static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, - gfp_t flags) -{ - return object; -} -static inline bool kasan_slab_free(struct kmem_cache *s, void *object, - unsigned long ip) -{ - return false; -} - -static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } +static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {} +static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} #endif /* CONFIG_KASAN */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 024ec0a00c72..5299b90a6c40 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -31,6 +31,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -1422,22 +1423,30 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) #endif /* CONFIG_NUMA_BALANCING */ #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) + static inline u8 page_kasan_tag(const struct page *page) { - return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; + if (kasan_enabled()) + return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; + return 0xff; } static inline void page_kasan_tag_set(struct page *page, u8 tag) { - page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); - page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; + if (kasan_enabled()) { + page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); + page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; + } } static inline void page_kasan_tag_reset(struct page *page) { - page_kasan_tag_set(page, 0xff); + if (kasan_enabled()) + page_kasan_tag_set(page, 0xff); } -#else + +#else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ + static inline u8 page_kasan_tag(const struct page *page) { return 0xff; @@ -1445,7 +1454,8 @@ static inline u8 page_kasan_tag(const struct page *page) static inline void page_kasan_tag_set(struct page *page, u8 tag) { } static inline void page_kasan_tag_reset(struct page *page) { } -#endif + +#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */ static inline struct zone *page_zone(const struct page *page) { -- cgit v1.2.3 From eeb3160c2419e0f1045537acac7b19cba64112f4 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:13 -0800 Subject: kasan, mm: rename kasan_poison_kfree Rename kasan_poison_kfree() to kasan_slab_free_mempool() as it better reflects what this annotation does. Also add a comment that explains the PageSlab() check. No functional changes. Link: https://lkml.kernel.org/r/141675fb493555e984c5dca555e9d9f768c7bbaa.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/I5026f87364e556b506ef1baee725144bb04b8810 Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 9176849c4934..6f0c5d9aa43f 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -176,6 +176,13 @@ static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object, return false; } +void __kasan_slab_free_mempool(void *ptr, unsigned long ip); +static __always_inline void kasan_slab_free_mempool(void *ptr, unsigned long ip) +{ + if (kasan_enabled()) + __kasan_slab_free_mempool(ptr, ip); +} + void * __must_check __kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); static __always_inline void * __must_check kasan_slab_alloc( @@ -216,13 +223,6 @@ static __always_inline void * __must_check kasan_krealloc(const void *object, return (void *)object; } -void __kasan_poison_kfree(void *ptr, unsigned long ip); -static __always_inline void kasan_poison_kfree(void *ptr, unsigned long ip) -{ - if (kasan_enabled()) - __kasan_poison_kfree(ptr, ip); -} - void __kasan_kfree_large(void *ptr, unsigned long ip); static __always_inline void kasan_kfree_large(void *ptr, unsigned long ip) { @@ -261,6 +261,7 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object, { return false; } +static inline void kasan_slab_free_mempool(void *ptr, unsigned long ip) {} static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags) { @@ -280,7 +281,6 @@ static inline void *kasan_krealloc(const void *object, size_t new_size, { return (void *)object; } -static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {} static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} #endif /* CONFIG_KASAN */ -- cgit v1.2.3 From e86f8b09f215e3755cd2d56930487dec2de02433 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 22 Dec 2020 12:03:31 -0800 Subject: kasan, mm: allow cache merging with no metadata The reason cache merging is disabled with KASAN is because KASAN puts its metadata right after the allocated object. When the merged caches have slightly different sizes, the metadata ends up in different places, which KASAN doesn't support. It might be possible to adjust the metadata allocation algorithm and make it friendly to the cache merging code. Instead this change takes a simpler approach and allows merging caches when no metadata is present. Which is the case for hardware tag-based KASAN with kasan.mode=prod. Link: https://lkml.kernel.org/r/37497e940bfd4b32c0a93a702a9ae4cf061d5392.1606162397.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/Ia114847dfb2244f297d2cb82d592bf6a07455dba Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Reviewed-by: Dmitry Vyukov Reviewed-by: Marco Elver Tested-by: Vincenzo Frascino Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Branislav Rankov Cc: Catalin Marinas Cc: Evgenii Stepanov Cc: Kevin Brodsky Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 6f0c5d9aa43f..5e0655fb2a6f 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -82,17 +82,30 @@ struct kasan_cache { }; #ifdef CONFIG_KASAN_HW_TAGS + DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); + static __always_inline bool kasan_enabled(void) { return static_branch_likely(&kasan_flag_enabled); } -#else + +#else /* CONFIG_KASAN_HW_TAGS */ + static inline bool kasan_enabled(void) { return true; } -#endif + +#endif /* CONFIG_KASAN_HW_TAGS */ + +slab_flags_t __kasan_never_merge(void); +static __always_inline slab_flags_t kasan_never_merge(void) +{ + if (kasan_enabled()) + return __kasan_never_merge(); + return 0; +} void __kasan_unpoison_range(const void *addr, size_t size); static __always_inline void kasan_unpoison_range(const void *addr, size_t size) @@ -239,6 +252,10 @@ static inline bool kasan_enabled(void) { return false; } +static inline slab_flags_t kasan_never_merge(void) +{ + return 0; +} static inline void kasan_unpoison_range(const void *address, size_t size) {} static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} -- cgit v1.2.3 From 2ca408d9c749c32288bc28725f9f12ba30299e8f Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 30 Nov 2020 17:30:59 -0500 Subject: fanotify: Fix sys_fanotify_mark() on native x86-32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 121b32a58a3a ("x86/entry/32: Use IA32-specific wrappers for syscalls taking 64-bit arguments") converted native x86-32 which take 64-bit arguments to use the compat handlers to allow conversion to passing args via pt_regs. sys_fanotify_mark() was however missed, as it has a general compat handler. Add a config option that will use the syscall wrapper that takes the split args for native 32-bit. [ bp: Fix typo in Kconfig help text. ] Fixes: 121b32a58a3a ("x86/entry/32: Use IA32-specific wrappers for syscalls taking 64-bit arguments") Reported-by: Paweł Jasiak Signed-off-by: Brian Gerst Signed-off-by: Borislav Petkov Acked-by: Jan Kara Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20201130223059.101286-1-brgerst@gmail.com --- include/linux/syscalls.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f3929aff39cf..7688bc983de5 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -251,6 +251,30 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* __SYSCALL_DEFINEx */ +/* For split 64-bit arguments on 32-bit architectures */ +#ifdef __LITTLE_ENDIAN +#define SC_ARG64(name) u32, name##_lo, u32, name##_hi +#else +#define SC_ARG64(name) u32, name##_hi, u32, name##_lo +#endif +#define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo) + +#ifdef CONFIG_COMPAT +#define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1 +#define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2 +#define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3 +#define SYSCALL32_DEFINE4 COMPAT_SYSCALL_DEFINE4 +#define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5 +#define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6 +#else +#define SYSCALL32_DEFINE1 SYSCALL_DEFINE1 +#define SYSCALL32_DEFINE2 SYSCALL_DEFINE2 +#define SYSCALL32_DEFINE3 SYSCALL_DEFINE3 +#define SYSCALL32_DEFINE4 SYSCALL_DEFINE4 +#define SYSCALL32_DEFINE5 SYSCALL_DEFINE5 +#define SYSCALL32_DEFINE6 SYSCALL_DEFINE6 +#endif + /* * Called before coming back to user-mode. Returning to user-mode with an * address limit different than USER_DS can allow to overwrite kernel memory. -- cgit v1.2.3 From 664f1e259a982bf213f0cd8eea7616c89546585c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 16 Dec 2020 18:40:05 +0100 Subject: libceph: add __maybe_unused to DEFINE_MSGR2_FEATURE Avoid -Wunused-const-variable warnings for "make W=1". Reported-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/msgr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index f5e02f6c0655..3989dcb94d3d 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -33,8 +33,8 @@ #define CEPH_MSGR2_INCARNATION_1 (0ull) #define DEFINE_MSGR2_FEATURE(bit, incarnation, name) \ - static const uint64_t CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \ - static const uint64_t CEPH_MSGR2_FEATUREMASK_##name = \ + static const uint64_t __maybe_unused CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \ + static const uint64_t __maybe_unused CEPH_MSGR2_FEATUREMASK_##name = \ (1ULL << bit | CEPH_MSGR2_INCARNATION_##incarnation); #define HAVE_MSGR2_FEATURE(x, name) \ -- cgit v1.2.3 From 3a176b94609a18f5f8bac7ddbf8923bd737262db Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 29 Dec 2020 15:14:28 -0800 Subject: Revert "kbuild: avoid static_assert for genksyms" This reverts commit 14dc3983b5dff513a90bd5a8cc90acaf7867c3d0. Macro Elver had sent a fix proper fix earlier, and also pointed out corner cases: "I guess what you propose is simpler, but might still have corner cases where we still get warnings. In particular, if some file (for whatever reason) does not include build_bug.h and uses a raw _Static_assert(), then we still get warnings. E.g. I see 1 user of raw _Static_assert() (drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h )." I believe the raw use of _Static_assert() should be allowed, so this should be fixed in genksyms. Even after commit 14dc3983b5df ("kbuild: avoid static_assert for genksyms"), I confirmed the following test code emits the warning. ---------------->8---------------- #include _Static_assert((1 ?: 0), ""); void foo(void) { } EXPORT_SYMBOL(foo); ---------------->8---------------- WARNING: modpost: EXPORT symbol "foo" [vmlinux] version generation failed, symbol will not be versioned. Now that commit 869b91992bce ("genksyms: Ignore module scoped _Static_assert()") fixed this issue properly, the workaround should be reverted. Link: https://lkml.org/lkml/2020/12/10/845 Link: https://lkml.kernel.org/r/20201219183911.181442-1-masahiroy@kernel.org Signed-off-by: Masahiro Yamada Cc: Marco Elver Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/build_bug.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index 7bb66e15b481..e3a0be2c90ad 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -77,9 +77,4 @@ #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) -#ifdef __GENKSYMS__ -/* genksyms gets confused by _Static_assert */ -#define _Static_assert(expr, ...) -#endif - #endif /* _LINUX_BUILD_BUG_H */ -- cgit v1.2.3 From 6d87d0ece58bc0022ca5247721a8eb06ef66b673 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Tue, 29 Dec 2020 15:14:34 -0800 Subject: mm: add prototype for __add_to_page_cache_locked() Otherwise it causes a gcc warning: mm/filemap.c:830:14: warning: no previous prototype for `__add_to_page_cache_locked' [-Wmissing-prototypes] A previous attempt to make this function static led to compilation errors when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked() is referred to by BPF code. Adding a prototype will silence the warning. Link: https://lkml.kernel.org/r/1608693702-4665-1-git-send-email-jrdr.linux@gmail.com Signed-off-by: Souptick Joarder Cc: Alex Shi Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5299b90a6c40..c1e908184442 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -216,6 +216,13 @@ int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, loff_t *); +/* + * Any attempt to mark this function as static leads to build failure + * when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked() + * is referred to by BPF code. This must be visible for error injection. + */ +int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, + pgoff_t index, gfp_t gfp, void **shadowp); #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) -- cgit v1.2.3 From dc2da7b45ffe954a0090f5d0310ed7b0b37d2bd2 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 29 Dec 2020 15:14:37 -0800 Subject: mm: memmap defer init doesn't work as expected VMware observed a performance regression during memmap init on their platform, and bisected to commit 73a6e474cb376 ("mm: memmap_init: iterate over memblock regions rather that check each PFN") causing it. Before the commit: [0.033176] Normal zone: 1445888 pages used for memmap [0.033176] Normal zone: 89391104 pages, LIFO batch:63 [0.035851] ACPI: PM-Timer IO Port: 0x448 With commit [0.026874] Normal zone: 1445888 pages used for memmap [0.026875] Normal zone: 89391104 pages, LIFO batch:63 [2.028450] ACPI: PM-Timer IO Port: 0x448 The root cause is the current memmap defer init doesn't work as expected. Before, memmap_init_zone() was used to do memmap init of one whole zone, to initialize all low zones of one numa node, but defer memmap init of the last zone in that numa node. However, since commit 73a6e474cb376, function memmap_init() is adapted to iterater over memblock regions inside one zone, then call memmap_init_zone() to do memmap init for each region. E.g, on VMware's system, the memory layout is as below, there are two memory regions in node 2. The current code will mistakenly initialize the whole 1st region [mem 0xab00000000-0xfcffffffff], then do memmap defer to iniatialize only one memmory section on the 2nd region [mem 0x10000000000-0x1033fffffff]. In fact, we only expect to see that there's only one memory section's memmap initialized. That's why more time is costed at the time. [ 0.008842] ACPI: SRAT: Node 0 PXM 0 [mem 0x00000000-0x0009ffff] [ 0.008842] ACPI: SRAT: Node 0 PXM 0 [mem 0x00100000-0xbfffffff] [ 0.008843] ACPI: SRAT: Node 0 PXM 0 [mem 0x100000000-0x55ffffffff] [ 0.008844] ACPI: SRAT: Node 1 PXM 1 [mem 0x5600000000-0xaaffffffff] [ 0.008844] ACPI: SRAT: Node 2 PXM 2 [mem 0xab00000000-0xfcffffffff] [ 0.008845] ACPI: SRAT: Node 2 PXM 2 [mem 0x10000000000-0x1033fffffff] Now, let's add a parameter 'zone_end_pfn' to memmap_init_zone() to pass down the real zone end pfn so that defer_init() can use it to judge whether defer need be taken in zone wide. Link: https://lkml.kernel.org/r/20201223080811.16211-1-bhe@redhat.com Link: https://lkml.kernel.org/r/20201223080811.16211-2-bhe@redhat.com Fixes: commit 73a6e474cb376 ("mm: memmap_init: iterate over memblock regions rather that check each PFN") Signed-off-by: Baoquan He Reported-by: Rahul Gopakumar Reviewed-by: Mike Rapoport Cc: David Hildenbrand Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c1e908184442..ecdf8a8cd6ae 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2439,8 +2439,9 @@ extern int __meminit early_pfn_to_nid(unsigned long pfn); #endif extern void set_dma_reserve(unsigned long new_dma_reserve); -extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, - enum meminit_context, struct vmem_altmap *, int migratetype); +extern void memmap_init_zone(unsigned long, int, unsigned long, + unsigned long, unsigned long, enum meminit_context, + struct vmem_altmap *, int migratetype); extern void setup_per_zone_wmarks(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); -- cgit v1.2.3 From 8b0fac44bd1ff17016502b3c3533f5abb8456c65 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Tue, 29 Dec 2020 15:14:52 -0800 Subject: sizes.h: add SZ_8G/SZ_16G/SZ_32G macros Add these macros, since we can use them in drivers. Link: https://lkml.kernel.org/r/20201229072819.11183-1-sjhuang@iluvatar.ai Signed-off-by: Huang Shijie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sizes.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sizes.h b/include/linux/sizes.h index 9874f6f67537..1ac79bcee2bb 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -44,6 +44,9 @@ #define SZ_2G 0x80000000 #define SZ_4G _AC(0x100000000, ULL) +#define SZ_8G _AC(0x200000000, ULL) +#define SZ_16G _AC(0x400000000, ULL) +#define SZ_32G _AC(0x800000000, ULL) #define SZ_64T _AC(0x400000000000, ULL) #endif /* __LINUX_SIZES_H__ */ -- cgit v1.2.3 From aa8c7db494d0a83ecae583aa193f1134ef25d506 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 29 Dec 2020 15:14:55 -0800 Subject: kdev_t: always inline major/minor helper functions Silly GCC doesn't always inline these trivial functions. Fixes the following warning: arch/x86/kernel/sys_ia32.o: warning: objtool: cp_stat64()+0xd8: call to new_encode_dev() with UACCESS enabled Link: https://lkml.kernel.org/r/984353b44a4484d86ba9f73884b7306232e25e30.1608737428.git.jpoimboe@redhat.com Signed-off-by: Josh Poimboeuf Reported-by: Randy Dunlap Acked-by: Randy Dunlap [build-tested] Cc: Peter Zijlstra Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kdev_t.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h index 85b5151911cf..4856706fbfeb 100644 --- a/include/linux/kdev_t.h +++ b/include/linux/kdev_t.h @@ -21,61 +21,61 @@ }) /* acceptable for old filesystems */ -static inline bool old_valid_dev(dev_t dev) +static __always_inline bool old_valid_dev(dev_t dev) { return MAJOR(dev) < 256 && MINOR(dev) < 256; } -static inline u16 old_encode_dev(dev_t dev) +static __always_inline u16 old_encode_dev(dev_t dev) { return (MAJOR(dev) << 8) | MINOR(dev); } -static inline dev_t old_decode_dev(u16 val) +static __always_inline dev_t old_decode_dev(u16 val) { return MKDEV((val >> 8) & 255, val & 255); } -static inline u32 new_encode_dev(dev_t dev) +static __always_inline u32 new_encode_dev(dev_t dev) { unsigned major = MAJOR(dev); unsigned minor = MINOR(dev); return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12); } -static inline dev_t new_decode_dev(u32 dev) +static __always_inline dev_t new_decode_dev(u32 dev) { unsigned major = (dev & 0xfff00) >> 8; unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00); return MKDEV(major, minor); } -static inline u64 huge_encode_dev(dev_t dev) +static __always_inline u64 huge_encode_dev(dev_t dev) { return new_encode_dev(dev); } -static inline dev_t huge_decode_dev(u64 dev) +static __always_inline dev_t huge_decode_dev(u64 dev) { return new_decode_dev(dev); } -static inline int sysv_valid_dev(dev_t dev) +static __always_inline int sysv_valid_dev(dev_t dev) { return MAJOR(dev) < (1<<14) && MINOR(dev) < (1<<18); } -static inline u32 sysv_encode_dev(dev_t dev) +static __always_inline u32 sysv_encode_dev(dev_t dev) { return MINOR(dev) | (MAJOR(dev) << 18); } -static inline unsigned sysv_major(u32 dev) +static __always_inline unsigned sysv_major(u32 dev) { return (dev >> 18) & 0x3fff; } -static inline unsigned sysv_minor(u32 dev) +static __always_inline unsigned sysv_minor(u32 dev) { return dev & 0x3ffff; } -- cgit v1.2.3 From e89eed02a5f1b864fa5abafc8e8e71bd9fd66d1f Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 5 Jan 2021 20:53:42 +0100 Subject: kcov, usb: hide in_serving_softirq checks in __usb_hcd_giveback_urb Done opencode in_serving_softirq() checks in in_serving_softirq() to avoid cluttering the code, hide them in kcov helpers instead. Fixes: aee9ddb1d371 ("kcov, usb: only collect coverage from __usb_hcd_giveback_urb in softirq") Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/aeb430c5bb90b0ccdf1ec302c70831c1a47b9c45.1609876340.git.andreyknvl@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/kcov.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kcov.h b/include/linux/kcov.h index a10e84707d82..4e3037dc1204 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -52,6 +52,25 @@ static inline void kcov_remote_start_usb(u64 id) kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_USB, id)); } +/* + * The softirq flavor of kcov_remote_*() functions is introduced as a temporary + * work around for kcov's lack of nested remote coverage sections support in + * task context. Adding suport for nested sections is tracked in: + * https://bugzilla.kernel.org/show_bug.cgi?id=210337 + */ + +static inline void kcov_remote_start_usb_softirq(u64 id) +{ + if (in_serving_softirq()) + kcov_remote_start_usb(id); +} + +static inline void kcov_remote_stop_softirq(void) +{ + if (in_serving_softirq()) + kcov_remote_stop(); +} + #else static inline void kcov_task_init(struct task_struct *t) {} @@ -66,6 +85,8 @@ static inline u64 kcov_common_handle(void) } static inline void kcov_remote_start_common(u64 id) {} static inline void kcov_remote_start_usb(u64 id) {} +static inline void kcov_remote_start_usb_softirq(u64 id) {} +static inline void kcov_remote_stop_softirq(void) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ -- cgit v1.2.3 From 9ad9f45b3b91162b33abfe175ae75ab65718dbf5 Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Thu, 7 Jan 2021 00:03:55 +0800 Subject: iommu/vt-d: Move intel_iommu info from struct intel_svm to struct intel_svm_dev 'struct intel_svm' is shared by all devices bound to a give process, but records only a single pointer to a 'struct intel_iommu'. Consequently, cache invalidations may only be applied to a single DMAR unit, and are erroneously skipped for the other devices. In preparation for fixing this, rework the structures so that the iommu pointer resides in 'struct intel_svm_dev', allowing 'struct intel_svm' to track them in its device list. Fixes: 1c4f88b7f1f9 ("iommu/vt-d: Shared virtual address in scalable mode") Cc: Lu Baolu Cc: Jacob Pan Cc: Raj Ashok Cc: David Woodhouse Reported-by: Guo Kaijie Reported-by: Xin Zeng Signed-off-by: Guo Kaijie Signed-off-by: Xin Zeng Signed-off-by: Liu Yi L Tested-by: Guo Kaijie Cc: stable@vger.kernel.org # v5.0+ Acked-by: Lu Baolu Link: https://lore.kernel.org/r/1609949037-25291-2-git-send-email-yi.l.liu@intel.com Signed-off-by: Will Deacon --- include/linux/intel-iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d956987ed032..94522685a0d9 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -758,6 +758,7 @@ struct intel_svm_dev { struct list_head list; struct rcu_head rcu; struct device *dev; + struct intel_iommu *iommu; struct svm_dev_ops *ops; struct iommu_sva sva; u32 pasid; @@ -771,7 +772,6 @@ struct intel_svm { struct mmu_notifier notifier; struct mm_struct *mm; - struct intel_iommu *iommu; unsigned int flags; u32 pasid; int gpasid; /* In case that guest PASID is different from host PASID */ -- cgit v1.2.3 From 18abda7a2d555783d28ea1701f3ec95e96237a86 Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Thu, 7 Jan 2021 00:03:56 +0800 Subject: iommu/vt-d: Fix general protection fault in aux_detach_device() The aux-domain attach/detach are not tracked, some data structures might be used after free. This causes general protection faults when multiple subdevices are created and assigned to a same guest machine: | general protection fault, probably for non-canonical address 0xdead000000000100: 0000 [#1] SMP NOPTI | RIP: 0010:intel_iommu_aux_detach_device+0x12a/0x1f0 | [...] | Call Trace: | iommu_aux_detach_device+0x24/0x70 | vfio_mdev_detach_domain+0x3b/0x60 | ? vfio_mdev_set_domain+0x50/0x50 | iommu_group_for_each_dev+0x4f/0x80 | vfio_iommu_detach_group.isra.0+0x22/0x30 | vfio_iommu_type1_detach_group.cold+0x71/0x211 | ? find_exported_symbol_in_section+0x4a/0xd0 | ? each_symbol_section+0x28/0x50 | __vfio_group_unset_container+0x4d/0x150 | vfio_group_try_dissolve_container+0x25/0x30 | vfio_group_put_external_user+0x13/0x20 | kvm_vfio_group_put_external_user+0x27/0x40 [kvm] | kvm_vfio_destroy+0x45/0xb0 [kvm] | kvm_put_kvm+0x1bb/0x2e0 [kvm] | kvm_vm_release+0x22/0x30 [kvm] | __fput+0xcc/0x260 | ____fput+0xe/0x10 | task_work_run+0x8f/0xb0 | do_exit+0x358/0xaf0 | ? wake_up_state+0x10/0x20 | ? signal_wake_up_state+0x1a/0x30 | do_group_exit+0x47/0xb0 | __x64_sys_exit_group+0x18/0x20 | do_syscall_64+0x57/0x1d0 | entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix the crash by tracking the subdevices when attaching and detaching aux-domains. Fixes: 67b8e02b5e76 ("iommu/vt-d: Aux-domain specific domain attach/detach") Co-developed-by: Xin Zeng Signed-off-by: Xin Zeng Signed-off-by: Liu Yi L Acked-by: Lu Baolu Link: https://lore.kernel.org/r/1609949037-25291-3-git-send-email-yi.l.liu@intel.com Signed-off-by: Will Deacon --- include/linux/intel-iommu.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 94522685a0d9..09c6a0bf3892 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -533,11 +533,10 @@ struct dmar_domain { /* Domain ids per IOMMU. Use u16 since * domain ids are 16 bit wide according * to VT-d spec, section 9.3 */ - unsigned int auxd_refcnt; /* Refcount of auxiliary attaching */ bool has_iotlb_device; struct list_head devices; /* all devices' list */ - struct list_head auxd; /* link to device's auxiliary list */ + struct list_head subdevices; /* all subdevices' list */ struct iova_domain iovad; /* iova's that belong to this domain */ struct dma_pte *pgd; /* virtual address */ @@ -610,14 +609,21 @@ struct intel_iommu { struct dmar_drhd_unit *drhd; }; +/* Per subdevice private data */ +struct subdev_domain_info { + struct list_head link_phys; /* link to phys device siblings */ + struct list_head link_domain; /* link to domain siblings */ + struct device *pdev; /* physical device derived from */ + struct dmar_domain *domain; /* aux-domain */ + int users; /* user count */ +}; + /* PCI domain-device relationship */ struct device_domain_info { struct list_head link; /* link to domain siblings */ struct list_head global; /* link to global list */ struct list_head table; /* link to pasid table */ - struct list_head auxiliary_domains; /* auxiliary domains - * attached to this device - */ + struct list_head subdevices; /* subdevices sibling */ u32 segment; /* PCI segment number */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ -- cgit v1.2.3 From ee61cfd955a64a58ed35cbcfc54068fcbd486945 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 31 Dec 2020 19:35:25 +0800 Subject: ACPI: scan: add stub acpi_create_platform_device() for !CONFIG_ACPI It adds a stub acpi_create_platform_device() for !CONFIG_ACPI build, so that caller doesn't have to deal with !CONFIG_ACPI build issue. Reported-by: kernel test robot Signed-off-by: Shawn Guo Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 2630c2e953f7..053bf05fb1f7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -885,6 +885,13 @@ static inline int acpi_device_modalias(struct device *dev, return -ENODEV; } +static inline struct platform_device * +acpi_create_platform_device(struct acpi_device *adev, + struct property_entry *properties) +{ + return NULL; +} + static inline bool acpi_dma_supported(struct acpi_device *adev) { return false; -- cgit v1.2.3 From 9c9be85f6b59d80efe4705109c0396df18d4e11d Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 24 Nov 2020 22:16:23 +0200 Subject: net/mlx5e: Add missing capability check for uplink follow Expose firmware indication that it supports setting eswitch uplink state to follow (follow the physical link). Condition setting the eswitch uplink admin-state with this capability bit. Older FW may not support the uplink state setting. Fixes: 7d0314b11cdd ("net/mlx5e: Modify uplink state on interface up/down") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8fbddec26eb8..442c0160caab 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1280,7 +1280,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ece_support[0x1]; u8 reserved_at_a4[0x7]; u8 log_max_srq[0x5]; - u8 reserved_at_b0[0x2]; + u8 reserved_at_b0[0x1]; + u8 uplink_follow[0x1]; u8 ts_cqe_to_dest_cqn[0x1]; u8 reserved_at_b3[0xd]; -- cgit v1.2.3 From a91bd6223ecd46addc71ee6fcd432206d39365d2 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 8 Jan 2021 12:48:47 +0100 Subject: Revert "init/console: Use ttynull as a fallback when there is no console" This reverts commit 757055ae8dedf5333af17b3b5b4b70ba9bc9da4e. The commit caused that ttynull was used as the default console on several systems[1][2][3]. As a result, the console was blank even when a better alternative existed. It happened when there was no console configured on the command line and ttynull_init() was the first initcall calling register_console(). Or it happened when /dev/ did not exist when console_on_rootfs() was called. It was not able to open /dev/console even though a console driver was registered. It tried to add ttynull console but it obviously did not help. But ttynull became the preferred console and was used by /dev/console when it was available later. The commit tried to fix a historical problem that have been there for ages. The primary motivation was the commit 3cffa06aeef7ece30f6 ("printk/console: Allow to disable console output by using console="" or console=null"). It provided a clean solution for a workaround that was widely used and worked only by chance. This revert causes that the console="" or console=null command line options will again work only by chance. These options will cause that a particular console will be preferred and the default (tty) ones will not get enabled. There will be no console registered at all. As a result there won't be stdin, stdout, and stderr for the init process. But it worked exactly this way even before. The proper solution has to fulfill many conditions: + Register ttynull only when explicitly required or as the ultimate fallback. + ttynull should get associated with /dev/console but it must not become preferred console when used as a fallback. Especially, it must still be possible to replace it by a better console later. Such a change requires clean up of the register_console() code. Otherwise, it would be even harder to follow. Especially, the use of has_preferred_console and CON_CONSDEV flag is tricky. The clean up is risky. The ordering of consoles is not well defined. And any changes tend to break existing user settings. Do the revert at the least risky solution for now. [1] https://lore.kernel.org/linux-kselftest/20201221144302.GR4077@smile.fi.intel.com/ [2] https://lore.kernel.org/lkml/d2a3b3c0-e548-7dd1-730f-59bc5c04e191@synopsys.com/ [3] https://patchwork.ozlabs.org/project/linux-um/patch/20210105120128.10854-1-thomas@m3y3r.de/ Reported-by: Andy Shevchenko Reported-by: Vineet Gupta Reported-by: Thomas Meyer Signed-off-by: Petr Mladek Acked-by: Greg Kroah-Hartman Acked-by: Sergey Senozhatsky Signed-off-by: Linus Torvalds --- include/linux/console.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index dbe78e8e2602..20874db50bc8 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -186,12 +186,9 @@ extern int braille_register_console(struct console *, int index, extern int braille_unregister_console(struct console *); #ifdef CONFIG_TTY extern void console_sysfs_notify(void); -extern void register_ttynull_console(void); #else static inline void console_sysfs_notify(void) { } -static inline void register_ttynull_console(void) -{ } #endif extern bool console_suspend_enabled; -- cgit v1.2.3 From 9b5948267adc9e689da609eb61cf7ed49cae5fa8 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 8 Jan 2021 11:15:56 -0500 Subject: dm integrity: fix flush with external metadata device With external metadata device, flush requests are not passed down to the data device. Fix this by submitting the flush request in dm_integrity_flush_buffers. In order to not degrade performance, we overlap the data device flush with the metadata device flush. Reported-by: Lukas Straub Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- include/linux/dm-bufio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 29d255fdd5d6..90bd558a17f5 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -150,6 +150,7 @@ void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n); unsigned dm_bufio_get_block_size(struct dm_bufio_client *c); sector_t dm_bufio_get_device_size(struct dm_bufio_client *c); +struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c); sector_t dm_bufio_get_block_number(struct dm_buffer *b); void *dm_bufio_get_block_data(struct dm_buffer *b); void *dm_bufio_get_aux_data(struct dm_buffer *b); -- cgit v1.2.3 From 29766bcffad03da66892bef82674883e31f78fec Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 9 Jan 2021 17:18:32 -0500 Subject: net: support kmap_local forced debugging in skb_frag_foreach Skb frags may be backed by highmem and/or compound pages. Highmem pages need kmap_atomic mappings to access. But kmap_atomic maps a single page, not the entire compound page. skb_foreach_page iterates over an skb frag, in one step in the common case, page by page only if kmap_atomic must be called for each page. The decision logic is captured in skb_frag_must_loop. CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP extends kmap from highmem to all pages, to increase code coverage. Extend skb_frag_must_loop to this new condition. Link: https://lore.kernel.org/linux-mm/20210106180132.41dc249d@gandalf.local.home/ Fixes: 0e91a0c6984c ("mm/highmem: Provide CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP") Reported-by: Steven Rostedt (VMware) Signed-off-by: Linus Torvalds Signed-off-by: Willem de Bruijn Tested-by: Steven Rostedt (VMware) Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 333bcdc39635..c858adfb5a82 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -366,7 +366,7 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) static inline bool skb_frag_must_loop(struct page *p) { #if defined(CONFIG_HIGHMEM) - if (PageHighMem(p)) + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) || PageHighMem(p)) return true; #endif return false; -- cgit v1.2.3 From 97550f6fa59254435d864b92603de3ca4b5a99f8 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 9 Jan 2021 17:18:33 -0500 Subject: net: compound page support in skb_seq_read skb_seq_read iterates over an skb, returning pointer and length of the next data range with each call. It relies on kmap_atomic to access highmem pages when needed. An skb frag may be backed by a compound page, but kmap_atomic maps only a single page. There are not enough kmap slots to always map all pages concurrently. Instead, if kmap_atomic is needed, iterate over each page. As this increases the number of calls, avoid this unless needed. The necessary condition is captured in skb_frag_must_loop. I tried to make the change as obvious as possible. It should be easy to verify that nothing changes if skb_frag_must_loop returns false. Tested: On an x86 platform with CONFIG_HIGHMEM=y CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP=y CONFIG_NETFILTER_XT_MATCH_STRING=y Run ip link set dev lo mtu 1500 iptables -A OUTPUT -m string --string 'badstring' -algo bm -j ACCEPT dd if=/dev/urandom of=in bs=1M count=20 nc -l -p 8000 > /dev/null & nc -w 1 -q 0 localhost 8000 < in Signed-off-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c858adfb5a82..5f60c9e907c9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1203,6 +1203,7 @@ struct skb_seq_state { struct sk_buff *root_skb; struct sk_buff *cur_skb; __u8 *frag_data; + __u32 frag_off; }; void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, -- cgit v1.2.3 From aba428a0c612bb259891307da12e22efd0fab14c Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Tue, 1 Dec 2020 17:52:31 +0800 Subject: timekeeping: Remove unused get_seconds() The get_seconds() cleanup seems to have been completed, now it is time to delete the legacy interface to avoid misuse later. Signed-off-by: Chunguang Xu Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/1606816351-26900-1-git-send-email-brookxu@tencent.com --- include/linux/ktime.h | 1 - include/linux/timekeeping32.h | 14 -------------- 2 files changed, 15 deletions(-) delete mode 100644 include/linux/timekeeping32.h (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index a12b5523cc18..73f20deb497d 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -230,6 +230,5 @@ static inline ktime_t ms_to_ktime(u64 ms) } # include -# include #endif diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h deleted file mode 100644 index 266017fc9ee9..000000000000 --- a/include/linux/timekeeping32.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _LINUX_TIMEKEEPING32_H -#define _LINUX_TIMEKEEPING32_H -/* - * These interfaces are all based on the old timespec type - * and should get replaced with the timespec64 based versions - * over time so we can remove the file here. - */ - -static inline unsigned long get_seconds(void) -{ - return ktime_get_real_seconds(); -} - -#endif -- cgit v1.2.3 From 7ea510b92c7c9b4eb5ff72e6b4bbad4b0407a914 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 12 Jan 2021 15:49:11 -0800 Subject: mm/memcontrol: fix warning in mem_cgroup_page_lruvec() Boot a CONFIG_MEMCG=y kernel with "cgroup_disabled=memory" and you are met by a series of warnings from the VM_WARN_ON_ONCE_PAGE(!memcg, page) recently added to the inline mem_cgroup_page_lruvec(). An earlier attempt to place that warning, in mem_cgroup_lruvec(), had been careful to do so after weeding out the mem_cgroup_disabled() case; but was itself invalid because of the mem_cgroup_lruvec(NULL, pgdat) in clear_pgdat_congested() and age_active_anon(). Warning in mem_cgroup_page_lruvec() was once useful in detecting a KSM charge bug, so may be worth keeping: but skip if mem_cgroup_disabled(). Link: https://lkml.kernel.org/r/alpine.LSU.2.11.2101032056260.1093@eggly.anvils Fixes: 9a1ac2288cf1 ("mm/memcontrol:rewrite mem_cgroup_page_lruvec()") Signed-off-by: Hugh Dickins Reviewed-by: Alex Shi Acked-by: Roman Gushchin Acked-by: Chris Down Reviewed-by: Baoquan He Acked-by: Vlastimil Babka Cc: Hui Su Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Johannes Weiner Cc: Shakeel Butt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d827bd7f3bfe..eeb0b52203e9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -665,7 +665,7 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, { struct mem_cgroup *memcg = page_memcg(page); - VM_WARN_ON_ONCE_PAGE(!memcg, page); + VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page); return mem_cgroup_lruvec(memcg, pgdat); } -- cgit v1.2.3 From 29970dc24faf0078beb4efab5455b4f504d2198d Mon Sep 17 00:00:00 2001 From: Hailong Liu Date: Tue, 12 Jan 2021 15:49:14 -0800 Subject: arm/kasan: fix the array size of kasan_early_shadow_pte[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The size of kasan_early_shadow_pte[] now is PTRS_PER_PTE which defined to 512 for arm. This means that it only covers the prev Linux pte entries, but not the HWTABLE pte entries for arm. The reason it currently works is that the symbol kasan_early_shadow_page immediately following kasan_early_shadow_pte in memory is page aligned, which makes kasan_early_shadow_pte look like a 4KB size array. But we can't ensure the order is always right with different compiler/linker, or if more bss symbols are introduced. We had a test with QEMU + vexpress:put a 512KB-size symbol with attribute __section(".bss..page_aligned") after kasan_early_shadow_pte, and poisoned it after kasan_early_init(). Then enabled CONFIG_KASAN, it failed to boot up. Link: https://lkml.kernel.org/r/20210109044622.8312-1-hailongliiu@yeah.net Signed-off-by: Hailong Liu Signed-off-by: Ziliang Guo Reviewed-by: Linus Walleij Cc: Andrey Ryabinin Cc: Russell King Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Ard Biesheuvel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5e0655fb2a6f..fe1ae73ff8b5 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -35,8 +35,12 @@ struct kunit_kasan_expectation { #define KASAN_SHADOW_INIT 0 #endif +#ifndef PTE_HWTABLE_PTRS +#define PTE_HWTABLE_PTRS 0 +#endif + extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; -extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE]; +extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS]; extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD]; extern pud_t kasan_early_shadow_pud[PTRS_PER_PUD]; extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; -- cgit v1.2.3 From b90d72a6bfdb5e5c62cd223a8cdf4045bfbcb94d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 12 Jan 2021 22:18:55 +0000 Subject: Revert "arm64: Enable perf events based hard lockup detector" This reverts commit 367c820ef08082e68df8a3bc12e62393af21e4b5. lockup_detector_init() makes heavy use of per-cpu variables and must be called with preemption disabled. Usually, it's handled early during boot in kernel_init_freeable(), before SMP has been initialised. Since we do not know whether or not our PMU interrupt can be signalled as an NMI until considerably later in the boot process, the Arm PMU driver attempts to re-initialise the lockup detector off the back of a device_initcall(). Unfortunately, this is called from preemptible context and results in the following splat: | BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 | caller is debug_smp_processor_id+0x20/0x2c | CPU: 2 PID: 1 Comm: swapper/0 Not tainted 5.10.0+ #276 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0x0/0x3c0 | show_stack+0x20/0x6c | dump_stack+0x2f0/0x42c | check_preemption_disabled+0x1cc/0x1dc | debug_smp_processor_id+0x20/0x2c | hardlockup_detector_event_create+0x34/0x18c | hardlockup_detector_perf_init+0x2c/0x134 | watchdog_nmi_probe+0x18/0x24 | lockup_detector_init+0x44/0xa8 | armv8_pmu_driver_init+0x54/0x78 | do_one_initcall+0x184/0x43c | kernel_init_freeable+0x368/0x380 | kernel_init+0x1c/0x1cc | ret_from_fork+0x10/0x30 Rather than bodge this with raw_smp_processor_id() or randomly disabling preemption, simply revert the culprit for now until we figure out how to do this properly. Reported-by: Lecopzer Chen Signed-off-by: Will Deacon Acked-by: Mark Rutland Cc: Sumit Garg Cc: Alexandru Elisei Link: https://lore.kernel.org/r/20201221162249.3119-1-lecopzer.chen@mediatek.com Link: https://lore.kernel.org/r/20210112221855.10666-1-will@kernel.org Signed-off-by: Catalin Marinas --- include/linux/perf/arm_pmu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index bf7966776c55..505480217cf1 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -163,8 +163,6 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn); static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } #endif -bool arm_pmu_irq_is_nmi(void); - /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void); -- cgit v1.2.3 From 5e6dca82bcaa49348f9e5fcb48df4881f6d6c4ae Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 12 Jan 2021 11:46:24 -0800 Subject: x86/entry: Emit a symbol for register restoring thunk Arnd found a randconfig that produces the warning: arch/x86/entry/thunk_64.o: warning: objtool: missing symbol for insn at offset 0x3e when building with LLVM_IAS=1 (Clang's integrated assembler). Josh notes: With the LLVM assembler not generating section symbols, objtool has no way to reference this code when it generates ORC unwinder entries, because this code is outside of any ELF function. The limitation now being imposed by objtool is that all code must be contained in an ELF symbol. And .L symbols don't create such symbols. So basically, you can use an .L symbol *inside* a function or a code segment, you just can't use the .L symbol to contain the code using a SYM_*_START/END annotation pair. Fangrui notes that this optimization is helpful for reducing image size when compiling with -ffunction-sections and -fdata-sections. I have observed on the order of tens of thousands of symbols for the kernel images built with those flags. A patch has been authored against GNU binutils to match this behavior of not generating unused section symbols ([1]), so this will also become a problem for users of GNU binutils once they upgrade to 2.36. Omit the .L prefix on a label so that the assembler will emit an entry into the symbol table for the label, with STB_LOCAL binding. This enables objtool to generate proper unwind info here with LLVM_IAS=1 or GNU binutils 2.36+. [ bp: Massage commit message. ] Reported-by: Arnd Bergmann Suggested-by: Josh Poimboeuf Suggested-by: Borislav Petkov Suggested-by: Mark Brown Signed-off-by: Nick Desaulniers Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20210112194625.4181814-1-ndesaulniers@google.com Link: https://github.com/ClangBuiltLinux/linux/issues/1209 Link: https://reviews.llvm.org/D93783 Link: https://sourceware.org/binutils/docs/as/Symbol-Names.html Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=d1bcae833b32f1408485ce69f844dcd7ded093a8 [1] --- include/linux/linkage.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 5bcfbd972e97..dbf8506decca 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -178,6 +178,11 @@ * Objtool generates debug info for both FUNC & CODE, but needs special * annotations for each CODE's start (to describe the actual stack frame). * + * Objtool requires that all code must be contained in an ELF symbol. Symbol + * names that have a .L prefix do not emit symbol table entries. .L + * prefixed symbols can be used within a code region, but should be avoided for + * denoting a range of code via ``SYM_*_START/END`` annotations. + * * ALIAS -- does not generate debug info -- the aliased function will */ -- cgit v1.2.3 From dca5244d2f5b94f1809f0c02a549edf41ccd5493 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 12 Jan 2021 22:48:32 +0000 Subject: compiler.h: Raise minimum version of GCC to 5.1 for arm64 GCC versions >= 4.9 and < 5.1 have been shown to emit memory references beyond the stack pointer, resulting in memory corruption if an interrupt is taken after the stack pointer has been adjusted but before the reference has been executed. This leads to subtle, infrequent data corruption such as the EXT4 problems reported by Russell King at the link below. Life is too short for buggy compilers, so raise the minimum GCC version required by arm64 to 5.1. Reported-by: Russell King Suggested-by: Arnd Bergmann Signed-off-by: Will Deacon Tested-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Acked-by: Linus Torvalds Cc: Cc: Theodore Ts'o Cc: Florian Weimer Cc: Peter Zijlstra Cc: Nick Desaulniers Link: https://lore.kernel.org/r/20210105154726.GD1551@shell.armlinux.org.uk Link: https://lore.kernel.org/r/20210112224832.10980-1-will@kernel.org Signed-off-by: Catalin Marinas --- include/linux/compiler-gcc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 74c6c0486eed..555ab0fddbef 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -13,6 +13,12 @@ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */ #if GCC_VERSION < 40900 # error Sorry, your version of GCC is too old - please use 4.9 or newer. +#elif defined(CONFIG_ARM64) && GCC_VERSION < 50100 +/* + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63293 + * https://lore.kernel.org/r/20210107111841.GN1551@shell.armlinux.org.uk + */ +# error Sorry, your version of GCC is too old - please use 5.1 or newer. #endif /* -- cgit v1.2.3 From 20d3bb92e84d417b0494a3b6867f0c86713db257 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Fri, 15 Jan 2021 07:30:46 +0100 Subject: nvme-pci: allow use of cmb on v1.4 controllers Since NVMe v1.4 the Controller Memory Buffer must be explicitly enabled by the host. Signed-off-by: Klaus Jensen [hch: avoid a local variable and add a comment] Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index d92535997687..bfed36e342cc 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -116,6 +116,9 @@ enum { NVME_REG_BPMBL = 0x0048, /* Boot Partition Memory Buffer * Location */ + NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory + * Space Control + */ NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */ NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */ NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */ @@ -135,6 +138,7 @@ enum { #define NVME_CAP_CSS(cap) (((cap) >> 37) & 0xff) #define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) #define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf) +#define NVME_CAP_CMBS(cap) (((cap) >> 57) & 0x1) #define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7) #define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff) @@ -192,6 +196,8 @@ enum { NVME_CSTS_SHST_OCCUR = 1 << 2, NVME_CSTS_SHST_CMPLT = 2 << 2, NVME_CSTS_SHST_MASK = 3 << 2, + NVME_CMBMSC_CRE = 1 << 0, + NVME_CMBMSC_CMSE = 1 << 1, }; struct nvme_id_power_state { -- cgit v1.2.3 From 8eed01b5ca9c1deff329ad44f08e2041ca14842c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 18 Jan 2021 16:06:55 +0100 Subject: mdio-bitbang: Export mdiobb_{read,write}() Export mdiobb_read() and mdiobb_write(), so Ethernet controller drivers can call them from their MDIO read/write wrappers. Signed-off-by: Geert Uytterhoeven Tested-by: Wolfram Sang Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- include/linux/mdio-bitbang.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdio-bitbang.h b/include/linux/mdio-bitbang.h index 5d71e8a8500f..aca4dc037b70 100644 --- a/include/linux/mdio-bitbang.h +++ b/include/linux/mdio-bitbang.h @@ -35,6 +35,9 @@ struct mdiobb_ctrl { const struct mdiobb_ops *ops; }; +int mdiobb_read(struct mii_bus *bus, int phy, int reg); +int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val); + /* The returned bus is not yet registered with the phy layer. */ struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl); -- cgit v1.2.3 From de641d74fb00f5b32f054ee154e31fb037e0db88 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 17 Jan 2021 11:26:33 +0200 Subject: Revert "RDMA/mlx5: Fix devlink deadlock on net namespace deletion" This reverts commit fbdd0049d98d44914fc57d4b91f867f4996c787b. Due to commit in fixes tag, netdevice events were received only in one net namespace of mlx5_core_dev. Due to this when netdevice events arrive in net namespace other than net namespace of mlx5_core_dev, they are missed. This results in empty GID table due to RDMA device being detached from its net device. Hence, revert back to receive netdevice events in all net namespaces to restore back RDMA functionality in non init_net net namespace. The deadlock will have to be addressed in another patch. Fixes: fbdd0049d98d ("RDMA/mlx5: Fix devlink deadlock on net namespace deletion") Link: https://lore.kernel.org/r/20210117092633.10690-1-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f93bfe7473aa..4672e12f1aa5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1209,22 +1209,4 @@ static inline bool mlx5_is_roce_enabled(struct mlx5_core_dev *dev) return val.vbool; } -/** - * mlx5_core_net - Provide net namespace of the mlx5_core_dev - * @dev: mlx5 core device - * - * mlx5_core_net() returns the net namespace of mlx5 core device. - * This can be called only in below described limited context. - * (a) When a devlink instance for mlx5_core is registered and - * when devlink reload operation is disabled. - * or - * (b) during devlink reload reload_down() and reload_up callbacks - * where it is ensured that devlink instance's net namespace is - * stable. - */ -static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) -{ - return devlink_net(priv_to_devlink(dev)); -} - #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From de658a195ee23ca6aaffe197d1d2ea040beea0a2 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Tue, 19 Jan 2021 17:12:08 -0800 Subject: net: usb: cdc_ncm: don't spew notifications RTL8156 sends notifications about every 32ms. Only display/log notifications when something changes. This issue has been reported by others: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1832472 https://lkml.org/lkml/2020/8/27/1083 ... [785962.779840] usb 1-1: new high-speed USB device number 5 using xhci_hcd [785962.929944] usb 1-1: New USB device found, idVendor=0bda, idProduct=8156, bcdDevice=30.00 [785962.929949] usb 1-1: New USB device strings: Mfr=1, Product=2, SerialNumber=6 [785962.929952] usb 1-1: Product: USB 10/100/1G/2.5G LAN [785962.929954] usb 1-1: Manufacturer: Realtek [785962.929956] usb 1-1: SerialNumber: 000000001 [785962.991755] usbcore: registered new interface driver cdc_ether [785963.017068] cdc_ncm 1-1:2.0: MAC-Address: 00:24:27:88:08:15 [785963.017072] cdc_ncm 1-1:2.0: setting rx_max = 16384 [785963.017169] cdc_ncm 1-1:2.0: setting tx_max = 16384 [785963.017682] cdc_ncm 1-1:2.0 usb0: register 'cdc_ncm' at usb-0000:00:14.0-1, CDC NCM, 00:24:27:88:08:15 [785963.019211] usbcore: registered new interface driver cdc_ncm [785963.023856] usbcore: registered new interface driver cdc_wdm [785963.025461] usbcore: registered new interface driver cdc_mbim [785963.038824] cdc_ncm 1-1:2.0 enx002427880815: renamed from usb0 [785963.089586] cdc_ncm 1-1:2.0 enx002427880815: network connection: disconnected [785963.121673] cdc_ncm 1-1:2.0 enx002427880815: network connection: disconnected [785963.153682] cdc_ncm 1-1:2.0 enx002427880815: network connection: disconnected ... This is about 2KB per second and will overwrite all contents of a 1MB dmesg buffer in under 10 minutes rendering them useless for debugging many kernel problems. This is also an extra 180 MB/day in /var/logs (or 1GB per week) rendering the majority of those logs useless too. When the link is up (expected state), spew amount is >2x higher: ... [786139.600992] cdc_ncm 2-1:2.0 enx002427880815: network connection: connected [786139.632997] cdc_ncm 2-1:2.0 enx002427880815: 2500 mbit/s downlink 2500 mbit/s uplink [786139.665097] cdc_ncm 2-1:2.0 enx002427880815: network connection: connected [786139.697100] cdc_ncm 2-1:2.0 enx002427880815: 2500 mbit/s downlink 2500 mbit/s uplink [786139.729094] cdc_ncm 2-1:2.0 enx002427880815: network connection: connected [786139.761108] cdc_ncm 2-1:2.0 enx002427880815: 2500 mbit/s downlink 2500 mbit/s uplink ... Chrome OS cannot support RTL8156 until this is fixed. Signed-off-by: Grant Grundler Reviewed-by: Hayes Wang Link: https://lore.kernel.org/r/20210120011208.3768105-1-grundler@chromium.org Signed-off-by: Jakub Kicinski --- include/linux/usb/usbnet.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 88a7673894d5..cfbfd6fe01df 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -81,6 +81,8 @@ struct usbnet { # define EVENT_LINK_CHANGE 11 # define EVENT_SET_RX_MODE 12 # define EVENT_NO_IP_ALIGN 13 + u32 rx_speed; /* in bps - NOT Mbps */ + u32 tx_speed; /* in bps - NOT Mbps */ }; static inline struct usb_driver *driver_of(struct usb_interface *intf) -- cgit v1.2.3 From 51dfb6ca3728bd0a0a3c23776a12d2a15a1d2457 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 20 Jan 2021 23:58:44 +0300 Subject: regulator: consumer: Add missing stubs to regulator/consumer.h Add missing stubs to regulator/consumer.h in order to fix COMPILE_TEST of the kernel. In particular this should fix compile-testing of OPP core because of a missing stub for regulator_sync_voltage(). Reported-by: kernel test robot Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20210120205844.12658-1-digetx@gmail.com Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 2024944fd2f7..20e84a84fb77 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -331,6 +331,12 @@ regulator_get_exclusive(struct device *dev, const char *id) return ERR_PTR(-ENODEV); } +static inline struct regulator *__must_check +devm_regulator_get_exclusive(struct device *dev, const char *id) +{ + return ERR_PTR(-ENODEV); +} + static inline struct regulator *__must_check regulator_get_optional(struct device *dev, const char *id) { @@ -486,6 +492,11 @@ static inline int regulator_get_voltage(struct regulator *regulator) return -EINVAL; } +static inline int regulator_sync_voltage(struct regulator *regulator) +{ + return -EINVAL; +} + static inline int regulator_is_supported_voltage(struct regulator *regulator, int min_uV, int max_uV) { @@ -578,6 +589,25 @@ static inline int devm_regulator_unregister_notifier(struct regulator *regulator return 0; } +static inline int regulator_suspend_enable(struct regulator_dev *rdev, + suspend_state_t state) +{ + return -EINVAL; +} + +static inline int regulator_suspend_disable(struct regulator_dev *rdev, + suspend_state_t state) +{ + return -EINVAL; +} + +static inline int regulator_set_suspend_voltage(struct regulator *regulator, + int min_uV, int max_uV, + suspend_state_t state) +{ + return -EINVAL; +} + static inline void *regulator_get_drvdata(struct regulator *regulator) { return NULL; -- cgit v1.2.3 From e020ff611ba9be54e959e6b548038f8a020da1c9 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Sun, 10 Jan 2021 09:54:07 -0800 Subject: driver core: Fix device link device name collision The device link device's name was of the form: -- This can cause name collision as reported here [1] as device names are not globally unique. Since device names have to be unique within the bus/class, add the bus/class name as a prefix to the device names used to construct the device link device name. So the devuce link device's name will be of the form: :--: [1] - https://lore.kernel.org/lkml/20201229033440.32142-1-michael@walle.cc/ Fixes: 287905e68dd2 ("driver core: Expose device link details in sysfs") Cc: stable@vger.kernel.org Reported-by: Michael Walle Tested-by: Michael Walle Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20210110175408.1465657-1-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 89bb8b84173e..1779f90eeb4c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -609,6 +609,18 @@ static inline const char *dev_name(const struct device *dev) return kobject_name(&dev->kobj); } +/** + * dev_bus_name - Return a device's bus/class name, if at all possible + * @dev: struct device to get the bus/class name of + * + * Will return the name of the bus/class the device is attached to. If it is + * not attached to a bus/class, an empty string will be returned. + */ +static inline const char *dev_bus_name(const struct device *dev) +{ + return dev->bus ? dev->bus->name : (dev->class ? dev->class->name : ""); +} + __printf(2, 3) int dev_set_name(struct device *dev, const char *name, ...); #ifdef CONFIG_NUMA -- cgit v1.2.3 From ac687e6e8c26181a33270efd1a2e2241377924b0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 Jan 2021 11:24:04 +0100 Subject: kthread: Extract KTHREAD_IS_PER_CPU There is a need to distinguish geniune per-cpu kthreads from kthreads that happen to have a single CPU affinity. Geniune per-cpu kthreads are kthreads that are CPU affine for correctness, these will obviously have PF_KTHREAD set, but must also have PF_NO_SETAFFINITY set, lest userspace modify their affinity and ruins things. However, these two things are not sufficient, PF_NO_SETAFFINITY is also set on other tasks that have their affinities controlled through other means, like for instance workqueues. Therefore another bit is needed; it turns out kthread_create_per_cpu() already has such a bit: KTHREAD_IS_PER_CPU, which is used to make kthread_park()/kthread_unpark() work correctly. Expose this flag and remove the implicit setting of it from kthread_create_on_cpu(); the io_uring usage of it seems dubious at best. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210121103506.557620262@infradead.org --- include/linux/kthread.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 65b81e0c494d..2484ed97e72f 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -33,6 +33,9 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), unsigned int cpu, const char *namefmt); +void kthread_set_per_cpu(struct task_struct *k, int cpu); +bool kthread_is_per_cpu(struct task_struct *k); + /** * kthread_run - create and wake a thread. * @threadfn: the function to run until signal_pending(current). -- cgit v1.2.3 From 9f12e37cae44a96132fc3031535a0b165486941a Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 25 Jan 2021 11:09:25 -0800 Subject: Commit 9bb48c82aced ("tty: implement write_iter") converted the tty layer to use write_iter. Fix the redirected_tty_write declaration also in n_tty and change the comparisons to use write_iter instead of write. [ Also moved the declaration of redirected_tty_write() to the proper location in a header file. The reason for the bug was the bogus extern declaration in n_tty.c silently not matching the changed definition in tty_io.c, and because it wasn't in a shared header file, there was no cross-checking of the declaration. Sami noticed because Clang's Control Flow Integrity checking ended up incidentally noticing the inconsistent declaration. - Linus ] Fixes: 9bb48c82aced ("tty: implement write_iter") Signed-off-by: Sami Tolvanen Signed-off-by: Linus Torvalds --- include/linux/tty.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index c873f475f0a7..37803f3e6d49 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -421,6 +421,7 @@ extern void tty_kclose(struct tty_struct *tty); extern int tty_dev_name_to_number(const char *name, dev_t *number); extern int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); extern void tty_ldisc_unlock(struct tty_struct *tty); +extern ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); #else static inline void tty_kref_put(struct tty_struct *tty) { } -- cgit v1.2.3 From ba6dfce47c4d002d96cd02a304132fca76981172 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Thu, 21 Jan 2021 16:17:23 -0500 Subject: SUNRPC: Move simple_get_bytes and simple_get_netobj into private header Remove duplicated helper functions to parse opaque XDR objects and place inside new file net/sunrpc/auth_gss/auth_gss_internal.h. In the new file carry the license and copyright from the source file net/sunrpc/auth_gss/auth_gss.c. Finally, update the comment inside include/linux/sunrpc/xdr.h since lockd is not the only user of struct xdr_netobj. Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 19b6dea27367..b26213ae8c1a 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -25,8 +25,7 @@ struct rpc_rqst; #define XDR_QUADLEN(l) (((l) + 3) >> 2) /* - * Generic opaque `network object.' At the kernel level, this type - * is used only by lockd. + * Generic opaque `network object.' */ #define XDR_MAX_NETOBJ 1024 struct xdr_netobj { -- cgit v1.2.3 From 97c753e62e6c31a404183898d950d8c08d752dbd Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 28 Jan 2021 00:37:51 +0900 Subject: tracing/kprobe: Fix to support kretprobe events on unloaded modules Fix kprobe_on_func_entry() returns error code instead of false so that register_kretprobe() can return an appropriate error code. append_trace_kprobe() expects the kprobe registration returns -ENOENT when the target symbol is not found, and it checks whether the target module is unloaded or not. If the target module doesn't exist, it defers to probe the target symbol until the module is loaded. However, since register_kretprobe() returns -EINVAL instead of -ENOENT in that case, it always fail on putting the kretprobe event on unloaded modules. e.g. Kprobe event: /sys/kernel/debug/tracing # echo p xfs:xfs_end_io >> kprobe_events [ 16.515574] trace_kprobe: This probe might be able to register after target module is loaded. Continue. Kretprobe event: (p -> r) /sys/kernel/debug/tracing # echo r xfs:xfs_end_io >> kprobe_events sh: write error: Invalid argument /sys/kernel/debug/tracing # cat error_log [ 41.122514] trace_kprobe: error: Failed to register probe event Command: r xfs:xfs_end_io ^ To fix this bug, change kprobe_on_func_entry() to detect symbol lookup failure and return -ENOENT in that case. Otherwise it returns -EINVAL or 0 (succeeded, given address is on the entry). Link: https://lkml.kernel.org/r/161176187132.1067016.8118042342894378981.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 59158ec4aef7 ("tracing/kprobes: Check the probe on unloaded module correctly") Reported-by: Jianlin Lv Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/kprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index b3a36b0cfc81..1883a4a9f16a 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -266,7 +266,7 @@ extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); extern int arch_populate_kprobe_blacklist(void); extern bool arch_kprobe_on_func_entry(unsigned long offset); -extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); +extern int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); extern int kprobe_add_ksym_blacklist(unsigned long entry); -- cgit v1.2.3 From 4c457e8cb75eda91906a4f89fc39bde3f9a43922 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 23 Jan 2021 12:27:59 +0000 Subject: genirq/msi: Activate Multi-MSI early when MSI_FLAG_ACTIVATE_EARLY is set When MSI_FLAG_ACTIVATE_EARLY is set (which is the case for PCI), __msi_domain_alloc_irqs() performs the activation of the interrupt (which in the case of PCI results in the endpoint being programmed) as soon as the interrupt is allocated. But it appears that this is only done for the first vector, introducing an inconsistent behaviour for PCI Multi-MSI. Fix it by iterating over the number of vectors allocated to each MSI descriptor. This is easily achieved by introducing a new "for_each_msi_vector" iterator, together with a tiny bit of refactoring. Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early") Reported-by: Shameer Kolothum Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Shameer Kolothum Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210123122759.1781359-1-maz@kernel.org --- include/linux/msi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index 360a0a7e7341..aef35fd1cf11 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -178,6 +178,12 @@ struct msi_desc { list_for_each_entry((desc), dev_to_msi_list((dev)), list) #define for_each_msi_entry_safe(desc, tmp, dev) \ list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list) +#define for_each_msi_vector(desc, __irq, dev) \ + for_each_msi_entry((desc), (dev)) \ + if ((desc)->irq) \ + for (__irq = (desc)->irq; \ + __irq < ((desc)->irq + (desc)->nvec_used); \ + __irq++) #ifdef CONFIG_IRQ_MSI_IOMMU static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) -- cgit v1.2.3 From c8b186a8d54d7e12d28e9f9686cb00ff18fc2ab2 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 2 Feb 2021 18:23:26 +1100 Subject: tracepoint: Fix race between tracing and removing tracepoint When executing a tracepoint, the tracepoint's func is dereferenced twice - in __DO_TRACE() (where the returned pointer is checked) and later on in __traceiter_##_name where the returned pointer is dereferenced without checking which leads to races against tracepoint_removal_sync() and crashes. This adds a check before referencing the pointer in tracepoint_ptr_deref. Link: https://lkml.kernel.org/r/20210202072326.120557-1-aik@ozlabs.ru Cc: stable@vger.kernel.org Fixes: d25e37d89dd2f ("tracepoint: Optimize using static_call()") Acked-by: Peter Zijlstra (Intel) Signed-off-by: Alexey Kardashevskiy Signed-off-by: Steven Rostedt (VMware) --- include/linux/tracepoint.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 0f21617f1a66..966ed8980327 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -307,11 +307,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) \ it_func_ptr = \ rcu_dereference_raw((&__tracepoint_##_name)->funcs); \ - do { \ - it_func = (it_func_ptr)->func; \ - __data = (it_func_ptr)->data; \ - ((void(*)(void *, proto))(it_func))(__data, args); \ - } while ((++it_func_ptr)->func); \ + if (it_func_ptr) { \ + do { \ + it_func = (it_func_ptr)->func; \ + __data = (it_func_ptr)->data; \ + ((void(*)(void *, proto))(it_func))(__data, args); \ + } while ((++it_func_ptr)->func); \ + } \ return 0; \ } \ DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name); -- cgit v1.2.3 From 4c9fb5d9140802db4db9f66c23887f43174e113c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Feb 2021 15:54:19 +0100 Subject: iommu: Check dev->iommu in dev_iommu_priv_get() before dereferencing it The dev_iommu_priv_get() needs a similar check to dev_iommu_fwspec_get() to make sure no NULL-ptr is dereferenced. Fixes: 05a0542b456e1 ("iommu/amd: Store dev_data as device iommu private data") Cc: stable@vger.kernel.org # v5.8+ Link: https://lore.kernel.org/r/20210202145419.29143-1-joro@8bytes.org Reference: https://bugzilla.kernel.org/show_bug.cgi?id=211241 Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b3f0e2018c62..efa96263b81b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -616,7 +616,10 @@ static inline void dev_iommu_fwspec_set(struct device *dev, static inline void *dev_iommu_priv_get(struct device *dev) { - return dev->iommu->priv; + if (dev->iommu) + return dev->iommu->priv; + else + return NULL; } static inline void dev_iommu_priv_set(struct device *dev, void *priv) -- cgit v1.2.3 From 52cbd23a119c6ebf40a527e53f3402d2ea38eccb Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 3 Feb 2021 14:29:52 -0500 Subject: udp: fix skb_copy_and_csum_datagram with odd segment sizes When iteratively computing a checksum with csum_block_add, track the offset "pos" to correctly rotate in csum_block_add when offset is odd. The open coded implementation of skb_copy_and_csum_datagram did this. With the switch to __skb_datagram_iter calling csum_and_copy_to_iter, pos was reinitialized to 0 on each call. Bring back the pos by passing it along with the csum to the callback. Changes v1->v2 - pass csum value, instead of csump pointer (Alexander Duyck) Link: https://lore.kernel.org/netdev/20210128152353.GB27281@optiplex/ Fixes: 950fcaecd5cc ("datagram: consolidate datagram copy to iter helpers") Reported-by: Oliver Graute Signed-off-by: Willem de Bruijn Reviewed-by: Alexander Duyck Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20210203192952.1849843-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/uio.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 72d88566694e..27ff8eb786dc 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -260,7 +260,13 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) { i->count = count; } -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i); + +struct csum_state { + __wsum csum; + size_t off; +}; + +size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csstate, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, -- cgit v1.2.3 From 585fc0d2871c9318c949fbf45b1f081edd489e96 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 4 Feb 2021 18:32:03 -0800 Subject: mm: hugetlbfs: fix cannot migrate the fallocated HugeTLB page If a new hugetlb page is allocated during fallocate it will not be marked as active (set_page_huge_active) which will result in a later isolate_huge_page failure when the page migration code would like to move that page. Such a failure would be unexpected and wrong. Only export set_page_huge_active, just leave clear_page_huge_active as static. Because there are no external users. Link: https://lkml.kernel.org/r/20210115124942.46403-3-songmuchun@bytedance.com Fixes: 70c3547e36f5 (hugetlbfs: add hugetlbfs_fallocate()) Signed-off-by: Muchun Song Acked-by: Michal Hocko Reviewed-by: Mike Kravetz Reviewed-by: Oscar Salvador Cc: David Hildenbrand Cc: Yang Shi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ebca2ef02212..b5807f23caf8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -770,6 +770,8 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, } #endif +void set_page_huge_active(struct page *page); + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; -- cgit v1.2.3 From 4f6ec8602341e97b364e4e0d41a1ed08148f5e98 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Thu, 4 Feb 2021 18:32:24 -0800 Subject: mm/vmalloc: separate put pages and flush VM flags When VM_MAP_PUT_PAGES was added, it was defined with the same value as VM_FLUSH_RESET_PERMS. This doesn't seem like it will cause any big functional problems other than some excess flushing for VM_MAP_PUT_PAGES allocations. Redefine VM_MAP_PUT_PAGES to have its own value. Also, rearrange things so flags are less likely to be missed in the future. Link: https://lkml.kernel.org/r/20210122233706.9304-1-rick.p.edgecombe@intel.com Fixes: b944afc9d64d ("mm: add a VM_MAP_PUT_PAGES flag for vmap") Signed-off-by: Rick Edgecombe Suggested-by: Matthew Wilcox Cc: Miaohe Lin Cc: Christoph Hellwig Cc: Daniel Axtens Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 80c0181c411d..cedcda6593f6 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -24,7 +24,8 @@ struct notifier_block; /* in notifier.h */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* don't add guard page */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ -#define VM_MAP_PUT_PAGES 0x00000100 /* put pages and free array in vfree */ +#define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ +#define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ /* * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. @@ -37,12 +38,6 @@ struct notifier_block; /* in notifier.h */ * determine which allocations need the module shadow freed. */ -/* - * Memory with VM_FLUSH_RESET_PERMS cannot be freed in an interrupt or with - * vfree_atomic(). - */ -#define VM_FLUSH_RESET_PERMS 0x00000100 /* Reset direct map and flush TLB on unmap */ - /* bits [20..32] reserved for arch specific ioremap internals */ /* -- cgit v1.2.3 From 49c6631d3b4f61a7b5bb0453a885a12bfa06ffd8 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Thu, 4 Feb 2021 18:32:49 -0800 Subject: kasan: add explicit preconditions to kasan_report() Patch series "kasan: Fix metadata detection for KASAN_HW_TAGS", v5. With the introduction of KASAN_HW_TAGS, kasan_report() currently assumes that every location in memory has valid metadata associated. This is due to the fact that addr_has_metadata() returns always true. As a consequence of this, an invalid address (e.g. NULL pointer address) passed to kasan_report() when KASAN_HW_TAGS is enabled, leads to a kernel panic. Example below, based on arm64: BUG: KASAN: invalid-access in 0x0 Read at addr 0000000000000000 by task swapper/0/1 Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Mem abort info: ESR = 0x96000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 ... Call trace: mte_get_mem_tag+0x24/0x40 kasan_report+0x1a4/0x410 alsa_sound_last_init+0x8c/0xa4 do_one_initcall+0x50/0x1b0 kernel_init_freeable+0x1d4/0x23c kernel_init+0x14/0x118 ret_from_fork+0x10/0x34 Code: d65f03c0 9000f021 f9428021 b6cfff61 (d9600000) ---[ end trace 377c8bb45bdd3a1a ]--- hrtimer: interrupt took 48694256 ns note: swapper/0[1] exited with preempt_count 1 Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b SMP: stopping secondary CPUs Kernel Offset: 0x35abaf140000 from 0xffff800010000000 PHYS_OFFSET: 0x40000000 CPU features: 0x0a7e0152,61c0a030 Memory Limit: none ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]--- This series fixes the behavior of addr_has_metadata() that now returns true only when the address is valid. This patch (of 2): With the introduction of KASAN_HW_TAGS, kasan_report() accesses the metadata only when addr_has_metadata() succeeds. Add a comment to make sure that the preconditions to the function are explicitly clarified. Link: https://lkml.kernel.org/r/20210126134409.47894-1-vincenzo.frascino@arm.com Link: https://lkml.kernel.org/r/20210126134409.47894-2-vincenzo.frascino@arm.com Signed-off-by: Vincenzo Frascino Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Leon Romanovsky Cc: Andrey Konovalov Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: "Paul E . McKenney" Cc: Naresh Kamboju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index fe1ae73ff8b5..0aea9e2a2a01 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -333,6 +333,13 @@ static inline void *kasan_reset_tag(const void *addr) return (void *)arch_kasan_reset_tag(addr); } +/** + * kasan_report - print a report about a bad memory access detected by KASAN + * @addr: address of the bad access + * @size: size of the bad access + * @is_write: whether the bad access is a write or a read + * @ip: instruction pointer for the accessibility check or the bad access itself + */ bool kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); -- cgit v1.2.3 From 4c7bcb51ae25f79e3733982e5d0cd8ce8640ddfc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 21 Dec 2020 19:56:47 +0100 Subject: genirq: Prevent [devm_]irq_alloc_desc from returning irq 0 Since commit a85a6c86c25b ("driver core: platform: Clarify that IRQ 0 is invalid"), having a linux-irq with number 0 will trigger a WARN() when calling platform_get_irq*() to retrieve that linux-irq. Since [devm_]irq_alloc_desc allocs a single irq and since irq 0 is not used on some systems, it can return 0, triggering that WARN(). This happens e.g. on Intel Bay Trail and Cherry Trail devices using the LPE audio engine for HDMI audio: 0 is an invalid IRQ number WARNING: CPU: 3 PID: 472 at drivers/base/platform.c:238 platform_get_irq_optional+0x108/0x180 Modules linked in: snd_hdmi_lpe_audio(+) ... Call Trace: platform_get_irq+0x17/0x30 hdmi_lpe_audio_probe+0x4a/0x6c0 [snd_hdmi_lpe_audio] ---[ end trace ceece38854223a0b ]--- Change the 'from' parameter passed to __[devm_]irq_alloc_descs() by the [devm_]irq_alloc_desc macros from 0 to 1, so that these macros will no longer return 0. Fixes: a85a6c86c25b ("driver core: platform: Clarify that IRQ 0 is invalid") Signed-off-by: Hans de Goede Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201221185647.226146-1-hdegoede@redhat.com --- include/linux/irq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 4aeb1c4c7e07..2efde6a79b7e 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -928,7 +928,7 @@ int __devm_irq_alloc_descs(struct device *dev, int irq, unsigned int from, __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE, NULL) #define irq_alloc_desc(node) \ - irq_alloc_descs(-1, 0, 1, node) + irq_alloc_descs(-1, 1, 1, node) #define irq_alloc_desc_at(at, node) \ irq_alloc_descs(at, at, 1, node) @@ -943,7 +943,7 @@ int __devm_irq_alloc_descs(struct device *dev, int irq, unsigned int from, __devm_irq_alloc_descs(dev, irq, from, cnt, node, THIS_MODULE, NULL) #define devm_irq_alloc_desc(dev, node) \ - devm_irq_alloc_descs(dev, -1, 0, 1, node) + devm_irq_alloc_descs(dev, -1, 1, 1, node) #define devm_irq_alloc_desc_at(dev, at, node) \ devm_irq_alloc_descs(dev, at, at, 1, node) -- cgit v1.2.3 From 6342adcaa683c2b705c24ed201dc11b35854c88d Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Wed, 3 Feb 2021 13:00:48 -0500 Subject: entry: Ensure trap after single-step on system call return Commit 299155244770 ("entry: Drop usage of TIF flags in the generic syscall code") introduced a bug on architectures using the generic syscall entry code, in which processes stopped by PTRACE_SYSCALL do not trap on syscall return after receiving a TIF_SINGLESTEP. The reason is that the meaning of TIF_SINGLESTEP flag is overloaded to cause the trap after a system call is executed, but since the above commit, the syscall call handler only checks for the SYSCALL_WORK flags on the exit work. Split the meaning of TIF_SINGLESTEP such that it only means single-step mode, and create a new type of SYSCALL_WORK to request a trap immediately after a syscall in single-step mode. In the current implementation, the SYSCALL_WORK flag shadows the TIF_SINGLESTEP flag for simplicity. Update x86 to flip this bit when a tracer enables single stepping. Fixes: 299155244770 ("entry: Drop usage of TIF flags in the generic syscall code") Suggested-by: Linus Torvalds Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Thomas Gleixner Tested-by: Kyle Huey Link: https://lore.kernel.org/r/87h7mtc9pr.fsf_-_@collabora.com --- include/linux/entry-common.h | 1 + include/linux/thread_info.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index ca86a00abe86..a104b298019a 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -46,6 +46,7 @@ SYSCALL_WORK_SYSCALL_TRACE | \ SYSCALL_WORK_SYSCALL_AUDIT | \ SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ + SYSCALL_WORK_SYSCALL_EXIT_TRAP | \ ARCH_SYSCALL_WORK_EXIT) /* diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index c8a974cead73..9b2158c69275 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -43,6 +43,7 @@ enum syscall_work_bit { SYSCALL_WORK_BIT_SYSCALL_EMU, SYSCALL_WORK_BIT_SYSCALL_AUDIT, SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH, + SYSCALL_WORK_BIT_SYSCALL_EXIT_TRAP, }; #define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) @@ -51,6 +52,7 @@ enum syscall_work_bit { #define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU) #define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT) #define SYSCALL_WORK_SYSCALL_USER_DISPATCH BIT(SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH) +#define SYSCALL_WORK_SYSCALL_EXIT_TRAP BIT(SYSCALL_WORK_BIT_SYSCALL_EXIT_TRAP) #endif #include -- cgit v1.2.3 From 3aa6bce9af0e25b735c9c1263739a5639a336ae8 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 5 Feb 2021 17:37:32 -0800 Subject: net: watchdog: hold device global xmit lock during tx disable Prevent netif_tx_disable() running concurrently with dev_watchdog() by taking the device global xmit lock. Otherwise, the recommended: netif_carrier_off(dev); netif_tx_disable(dev); driver shutdown sequence can happen after the watchdog has already checked carrier, resulting in possible false alarms. This is because netif_tx_lock() only sets the frozen bit without maintaining the locks on the individual queues. Fixes: c3f26a269c24 ("netdev: Fix lockdep warnings in multiqueue configurations.") Signed-off-by: Edwin Peer Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 259be67644e3..5ff27c12ce68 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4352,6 +4352,7 @@ static inline void netif_tx_disable(struct net_device *dev) local_bh_disable(); cpu = smp_processor_id(); + spin_lock(&dev->tx_global_lock); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); @@ -4359,6 +4360,7 @@ static inline void netif_tx_disable(struct net_device *dev) netif_tx_stop_queue(txq); __netif_tx_unlock(txq); } + spin_unlock(&dev->tx_global_lock); local_bh_enable(); } -- cgit v1.2.3