From d6157e4f18173ad24441aa9ca04e7e9121a9b4c7 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 10 Aug 2015 16:41:45 +0200 Subject: ARM: at91: remove useless includes in platform_data/atmel.h include/linux/platform_data/atmel.h has a lot of useless includes, remove them. Signed-off-by: Alexandre Belloni Signed-off-by: Nicolas Ferre --- include/linux/platform_data/atmel.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h index 527a85c61924..4d67a5e82c83 100644 --- a/include/linux/platform_data/atmel.h +++ b/include/linux/platform_data/atmel.h @@ -9,15 +9,7 @@ #include #include -#include -#include -#include -#include -#include -#include -#include #include -#include /* * at91: 6 USARTs and one DBGU port (SAM9260) -- cgit v1.2.3 From 5cfc5220a63b1008e7198fb4f91c3ef763e46657 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Fri, 18 Sep 2015 10:14:41 +0300 Subject: ARM: pxa: Remove unused clock_enable field from struct pxa2xx_spi_master Use for struct pxa2xx_spi_master clock_enable field was removed years ago from the pxa2xx-spi driver by the commit 2f1a74e5a2de ("[ARM] pxa: make pxa2xx_spi driver use ssp_request()/ssp_free()"). Therefore remove it from structure definition, documentation and from couple affected board files. Signed-off-by: Jarkko Nikula Acked-by: Mark Brown Signed-off-by: Robert Jarzmik --- include/linux/spi/pxa2xx_spi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index 6d36dacec4ba..9ec4c147abbc 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -23,7 +23,6 @@ struct dma_chan; /* device.platform_data for SSP controller devices */ struct pxa2xx_spi_master { - u32 clock_enable; u16 num_chipselect; u8 enable_dma; -- cgit v1.2.3 From a645654b817feba05e5156345325d19fc85ebc9f Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 23 Aug 2015 15:18:55 +0200 Subject: vga_switcheroo: Document _ALL_ the things! This adds an "Overview" DOC section plus two DOC sections for the modes of use ("Manual switching and manual power control" and "Driver power control"). Also included is kernel-doc for all public functions, structs and enums. Signed-off-by: Lukas Wunner Signed-off-by: Daniel Vetter --- include/linux/vga_switcheroo.h | 85 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 82 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index b483abd34493..fe90bfc3b510 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -1,10 +1,31 @@ /* + * vga_switcheroo.h - Support for laptop with dual GPU using one set of outputs + * * Copyright (c) 2010 Red Hat Inc. * Author : Dave Airlie * - * Licensed under GPLv2 + * Copyright (c) 2015 Lukas Wunner + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS + * IN THE SOFTWARE. * - * vga_switcheroo.h - Support for laptop with dual GPU using one set of outputs */ #ifndef _LINUX_VGA_SWITCHEROO_H_ @@ -14,6 +35,20 @@ struct pci_dev; +/** + * enum vga_switcheroo_state - client power state + * @VGA_SWITCHEROO_OFF: off + * @VGA_SWITCHEROO_ON: on + * @VGA_SWITCHEROO_INIT: client has registered with vga_switcheroo but + * vga_switcheroo is not enabled, i.e. no second client or no handler + * has registered. Only used in vga_switcheroo_get_client_state() which + * in turn is only called from hda_intel.c + * @VGA_SWITCHEROO_NOT_FOUND: client has not registered with vga_switcheroo. + * Only used in vga_switcheroo_get_client_state() which in turn is only + * called from hda_intel.c + * + * Client power state. + */ enum vga_switcheroo_state { VGA_SWITCHEROO_OFF, VGA_SWITCHEROO_ON, @@ -22,20 +57,64 @@ enum vga_switcheroo_state { VGA_SWITCHEROO_NOT_FOUND, }; +/** + * enum vga_switcheroo_client_id - client identifier + * @VGA_SWITCHEROO_IGD: integrated graphics device + * @VGA_SWITCHEROO_DIS: discrete graphics device + * @VGA_SWITCHEROO_MAX_CLIENTS: currently no more than two GPUs are supported + * + * Client identifier. Audio clients use the same identifier & 0x100. + */ enum vga_switcheroo_client_id { VGA_SWITCHEROO_IGD, VGA_SWITCHEROO_DIS, VGA_SWITCHEROO_MAX_CLIENTS, }; +/** + * struct vga_switcheroo_handler - handler callbacks + * @init: initialize handler. + * Optional. This gets called when vga_switcheroo is enabled, i.e. when + * two vga clients have registered. It allows the handler to perform + * some delayed initialization that depends on the existence of the + * vga clients. Currently only the radeon and amdgpu drivers use this. + * The return value is ignored + * @switchto: switch outputs to given client. + * Mandatory. For muxless machines this should be a no-op. Returning 0 + * denotes success, anything else failure (in which case the switch is + * aborted) + * @power_state: cut or reinstate power of given client. + * Optional. The return value is ignored + * @get_client_id: determine if given pci device is integrated or discrete GPU. + * Mandatory + * + * Handler callbacks. The multiplexer itself. The @switchto and @get_client_id + * methods are mandatory, all others may be set to NULL. + */ struct vga_switcheroo_handler { + int (*init)(void); int (*switchto)(enum vga_switcheroo_client_id id); int (*power_state)(enum vga_switcheroo_client_id id, enum vga_switcheroo_state state); - int (*init)(void); int (*get_client_id)(struct pci_dev *pdev); }; +/** + * struct vga_switcheroo_client_ops - client callbacks + * @set_gpu_state: do the equivalent of suspend/resume for the card. + * Mandatory. This should not cut power to the discrete GPU, + * which is the job of the handler + * @reprobe: poll outputs. + * Optional. This gets called after waking the GPU and switching + * the outputs to it + * @can_switch: check if the device is in a position to switch now. + * Mandatory. The client should return false if a user space process + * has one of its device files open + * + * Client callbacks. A client can be either a GPU or an audio device on a GPU. + * The @set_gpu_state and @can_switch methods are mandatory, @reprobe may be + * set to NULL. For audio clients, the @reprobe member is bogus. + */ struct vga_switcheroo_client_ops { void (*set_gpu_state)(struct pci_dev *dev, enum vga_switcheroo_state); void (*reprobe)(struct pci_dev *dev); -- cgit v1.2.3 From 7dc87ff8815ef43717c936faea79013855e3dbef Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Mon, 7 Sep 2015 10:54:59 +0800 Subject: ARM: imx7d: add imx7d iomux-gpr field define Add imx7d iomux-gpr field define. Signed-off-by: Fugang Duan Signed-off-by: Shawn Guo --- include/linux/mfd/syscon/imx7-iomuxc-gpr.h | 47 ++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 include/linux/mfd/syscon/imx7-iomuxc-gpr.h (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/imx7-iomuxc-gpr.h b/include/linux/mfd/syscon/imx7-iomuxc-gpr.h new file mode 100644 index 000000000000..4585d6105d68 --- /dev/null +++ b/include/linux/mfd/syscon/imx7-iomuxc-gpr.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2015 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_IMX7_IOMUXC_GPR_H +#define __LINUX_IMX7_IOMUXC_GPR_H + +#define IOMUXC_GPR0 0x00 +#define IOMUXC_GPR1 0x04 +#define IOMUXC_GPR2 0x08 +#define IOMUXC_GPR3 0x0c +#define IOMUXC_GPR4 0x10 +#define IOMUXC_GPR5 0x14 +#define IOMUXC_GPR6 0x18 +#define IOMUXC_GPR7 0x1c +#define IOMUXC_GPR8 0x20 +#define IOMUXC_GPR9 0x24 +#define IOMUXC_GPR10 0x28 +#define IOMUXC_GPR11 0x2c +#define IOMUXC_GPR12 0x30 +#define IOMUXC_GPR13 0x34 +#define IOMUXC_GPR14 0x38 +#define IOMUXC_GPR15 0x3c +#define IOMUXC_GPR16 0x40 +#define IOMUXC_GPR17 0x44 +#define IOMUXC_GPR18 0x48 +#define IOMUXC_GPR19 0x4c +#define IOMUXC_GPR20 0x50 +#define IOMUXC_GPR21 0x54 +#define IOMUXC_GPR22 0x58 + +/* For imx7d iomux gpr register field define */ +#define IMX7D_GPR1_IRQ_MASK (0x1 << 12) +#define IMX7D_GPR1_ENET1_TX_CLK_SEL_MASK (0x1 << 13) +#define IMX7D_GPR1_ENET2_TX_CLK_SEL_MASK (0x1 << 14) +#define IMX7D_GPR1_ENET_TX_CLK_SEL_MASK (0x3 << 13) +#define IMX7D_GPR1_ENET1_CLK_DIR_MASK (0x1 << 17) +#define IMX7D_GPR1_ENET2_CLK_DIR_MASK (0x1 << 18) +#define IMX7D_GPR1_ENET_CLK_DIR_MASK (0x3 << 17) + +#define IMX7D_GPR5_CSI_MUX_CONTROL_MIPI (0x1 << 4) + +#endif /* __LINUX_IMX7_IOMUXC_GPR_H */ -- cgit v1.2.3 From f15a66e68422ca6bb783142780ad440067f6cc89 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 5 Sep 2015 11:22:39 +0200 Subject: drm: Spell vga_switcheroo consistently Currently everyone and their dog has their own favourite spelling for vga_switcheroo. This makes it hard to grep dmesg for log entries relating to vga_switcheroo. It also makes it hard to find related source files in the tree. vga_switcheroo.c uses pr_fmt "vga_switcheroo". Use that everywhere. Signed-off-by: Lukas Wunner Signed-off-by: Daniel Vetter --- include/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index bc9afa74ee11..be40dbaed11e 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -156,7 +156,7 @@ struct fb_cursor_user { #define FB_EVENT_GET_REQ 0x0D /* Unbind from the console if possible */ #define FB_EVENT_FB_UNBIND 0x0E -/* CONSOLE-SPECIFIC: remap all consoles to new fb - for vga switcheroo */ +/* CONSOLE-SPECIFIC: remap all consoles to new fb - for vga_switcheroo */ #define FB_EVENT_REMAP_ALL_CONSOLE 0x0F /* A hardware display blank early change occured */ #define FB_EARLY_EVENT_BLANK 0x10 -- cgit v1.2.3 From 21b45676b7c4b79334d8fe3c5a112af0517b66e9 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 27 Aug 2015 16:43:43 +0200 Subject: vga_switcheroo: Set active attribute to false for audio clients The active attribute in struct vga_switcheroo_client denotes whether the outputs are currently switched to this client. The attribute is only meaningful for vga clients. It is never used for audio clients. The function vga_switcheroo_register_audio_client() misuses this attribute to store whether the audio device is fully initialized. Most likely there was a misunderstanding about the meaning of "active" when this was added. Comment from Takashi's review: "Not really. The full initialization of audio was meant that the audio is active indeed. Admittedly, though, the active flag for each audio client doesn't play any role because the audio always follows the gfx state changes, and the value passed there doesn't reflect the actual state due to the later change. So, I agree with the removal of the flag itself -- or let the audio active flag following the corresponding gfx flag. The latter will make the proc output more consistent while the former is certainly more reduction of code." Set the active attribute to false for audio clients. Remove the active parameter from vga_switcheroo_register_audio_client() and its sole caller, hda_intel.c:register_vga_switcheroo(). vga_switcheroo_register_audio_client() was introduced by 3e9e63dbd374 ("vga_switcheroo: Add the support for audio clients"). Its use in hda_intel.c was introduced by a82d51ed24bb ("ALSA: hda - Support VGA-switcheroo"). v1.1: The changes above imply that in find_active_client() the call to client_is_vga() is now superfluous. Drop it. Cc: Takashi Iwai Signed-off-by: Lukas Wunner [danvet: Add Takashi's clarification to the commit message.] Reviewed-by: Takashi Iwai Signed-off-by: Daniel Vetter --- include/linux/vga_switcheroo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index fe90bfc3b510..376499197717 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -128,7 +128,7 @@ int vga_switcheroo_register_client(struct pci_dev *dev, bool driver_power_control); int vga_switcheroo_register_audio_client(struct pci_dev *pdev, const struct vga_switcheroo_client_ops *ops, - int id, bool active); + int id); void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info); @@ -154,7 +154,7 @@ static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_i static inline int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler) { return 0; } static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, const struct vga_switcheroo_client_ops *ops, - int id, bool active) { return 0; } + int id) { return 0; } static inline void vga_switcheroo_unregister_handler(void) {} static inline int vga_switcheroo_process_delayed_switch(void) { return 0; } static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; } -- cgit v1.2.3 From 8cb7cf56c9fe5412de238465b27ef35b4d2801aa Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 30 Mar 2015 10:59:52 +0100 Subject: firmware: add support for ARM System Control and Power Interface(SCPI) protocol This patch adds support for System Control and Power Interface (SCPI) Message Protocol used between the Application Cores(AP) and the System Control Processor(SCP). The MHU peripheral provides a mechanism for inter-processor communication between SCP's M3 processor and AP. SCP offers control and management of the core/cluster power states, various power domain DVFS including the core/cluster, certain system clocks configuration, thermal sensors and many others. This protocol driver provides interface for all the client drivers using SCPI to make use of the features offered by the SCP. Signed-off-by: Sudeep Holla Reviewed-by: Jon Medhurst (Tixy) Cc: Jassi Brar Cc: Liviu Dudau Cc: Lorenzo Pieralisi --- include/linux/scpi_protocol.h | 61 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 include/linux/scpi_protocol.h (limited to 'include/linux') diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h new file mode 100644 index 000000000000..e7169cd54e19 --- /dev/null +++ b/include/linux/scpi_protocol.h @@ -0,0 +1,61 @@ +/* + * SCPI Message Protocol driver header + * + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +#include + +struct scpi_opp { + u32 freq; + u32 m_volt; +} __packed; + +struct scpi_dvfs_info { + unsigned int count; + unsigned int latency; /* in nanoseconds */ + struct scpi_opp *opps; +}; + +/** + * struct scpi_ops - represents the various operations provided + * by SCP through SCPI message protocol + * @get_version: returns the major and minor revision on the SCPI + * message protocol + * @clk_get_range: gets clock range limit(min - max in Hz) + * @clk_get_val: gets clock value(in Hz) + * @clk_set_val: sets the clock value, setting to 0 will disable the + * clock (if supported) + * @dvfs_get_idx: gets the Operating Point of the given power domain. + * OPP is an index to the list return by @dvfs_get_info + * @dvfs_set_idx: sets the Operating Point of the given power domain. + * OPP is an index to the list return by @dvfs_get_info + * @dvfs_get_info: returns the DVFS capabilities of the given power + * domain. It includes the OPP list and the latency information + */ +struct scpi_ops { + u32 (*get_version)(void); + int (*clk_get_range)(u16, unsigned long *, unsigned long *); + unsigned long (*clk_get_val)(u16); + int (*clk_set_val)(u16, unsigned long); + int (*dvfs_get_idx)(u8); + int (*dvfs_set_idx)(u8, u8); + struct scpi_dvfs_info *(*dvfs_get_info)(u8); +}; + +#if IS_ENABLED(CONFIG_ARM_SCPI_PROTOCOL) +struct scpi_ops *get_scpi_ops(void); +#else +static inline struct scpi_ops *get_scpi_ops(void) { return NULL; } +#endif -- cgit v1.2.3 From 9290a16cf19301224556bc7bcb913c0c2a45bb9a Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 21 Aug 2015 11:48:37 +0000 Subject: dmaengine: OF DMAEngine API based on CONFIG_DMA_OF instead of CONFIG_OF 5fa422c ("dmaengine: move drivers/of/dma.c -> drivers/dma/of-dma.c") moved OF base DMAEngine code to of-dma.c, then it based on CONFIG_DMA_OF. But, OF base DMAEngine API on of_dma.h still based on CONFIG_OF now. So, current kernel can't find OF base DMAEngine API if .config has CONFIG_OF, but not have CONFIG_DMA_OF. This patch tidyup it. Signed-off-by: Kuninori Morimoto Signed-off-by: Vinod Koul --- include/linux/of_dma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h index 98ba7525929e..36112cdd665a 100644 --- a/include/linux/of_dma.h +++ b/include/linux/of_dma.h @@ -34,7 +34,7 @@ struct of_dma_filter_info { dma_filter_fn filter_fn; }; -#ifdef CONFIG_OF +#ifdef CONFIG_DMA_OF extern int of_dma_controller_register(struct device_node *np, struct dma_chan *(*of_dma_xlate) (struct of_phandle_args *, struct of_dma *), -- cgit v1.2.3 From 068654c200cc32966ce7906ca0bd096b9b97e988 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 26 May 2015 16:49:01 +0100 Subject: drivers: firmware: psci: move power_state handling to generic code Functions implemented on arm64 to check if a power_state parameter is valid and if the power_state implies context loss are not arm64 specific and should be moved to generic code so that they can be reused on arm systems too. This patch moves the functions handling the power_state parameter to generic PSCI firmware layer code. Signed-off-by: Lorenzo Pieralisi Acked-by: Will Deacon Acked-by: Sudeep Holla Tested-by: Jisheng Zhang Cc: Catalin Marinas Cc: Mark Rutland --- include/linux/psci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/psci.h b/include/linux/psci.h index a682fcc91c33..12c4865457ad 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -21,6 +21,8 @@ #define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 bool psci_tos_resident_on(int cpu); +bool psci_power_state_loses_context(u32 state); +bool psci_power_state_is_valid(u32 state); struct psci_operations { int (*cpu_suspend)(u32 state, unsigned long entry_point); -- cgit v1.2.3 From 7d8d05d11473a169ab4d53bc7fc23d1fe3f1959f Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sun, 16 Aug 2015 11:23:46 +0200 Subject: misc: atmel_tclib: get and use slow clock Commit dca1a4b5ff6e ("clk: at91: keep slow clk enabled to prevent system hang") added a workaround for the slow clock as it is not properly handled by its users. Get and use the slow clock as it is necessary for the timer counters. Signed-off-by: Boris Brezillon Signed-off-by: Alexandre Belloni Acked-by: Greg Kroah-Hartman Acked-by: Daniel Lezcano Acked-by: Thierry Reding --- include/linux/atmel_tc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/atmel_tc.h b/include/linux/atmel_tc.h index b87c1c7c242a..468fdfa643f0 100644 --- a/include/linux/atmel_tc.h +++ b/include/linux/atmel_tc.h @@ -67,6 +67,7 @@ struct atmel_tc { const struct atmel_tcb_config *tcb_config; int irq[3]; struct clk *clk[3]; + struct clk *slow_clk; struct list_head node; bool allocated; }; -- cgit v1.2.3 From 7f5028cf6190407b7a632b0f30b83187577824cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20L=C3=B3pez?= Date: Mon, 21 Sep 2015 10:38:20 -0300 Subject: sysfs: Support is_visible() on binary attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the sysfs header file: "The returned value will replace static permissions defined in struct attribute or struct bin_attribute." but this isn't the case, as is_visible is only called on struct attribute only. This patch introduces a new is_bin_visible() function to implement the same functionality for binary attributes, and updates documentation accordingly. Note that to keep functionality and code similar to that of normal attributes, the mode is now checked as well to ensure it contains only read/write permissions or SYSFS_PREALLOC. Reviewed-by: Guenter Roeck Signed-off-by: Emilio López Acked-by: Greg Kroah-Hartman Signed-off-by: Olof Johansson --- include/linux/sysfs.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 9f65758311a4..2f66050d073b 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -64,10 +64,18 @@ do { \ * a new subdirectory with this name. * @is_visible: Optional: Function to return permissions associated with an * attribute of the group. Will be called repeatedly for each - * attribute in the group. Only read/write permissions as well as - * SYSFS_PREALLOC are accepted. Must return 0 if an attribute is - * not visible. The returned value will replace static permissions - * defined in struct attribute or struct bin_attribute. + * non-binary attribute in the group. Only read/write + * permissions as well as SYSFS_PREALLOC are accepted. Must + * return 0 if an attribute is not visible. The returned value + * will replace static permissions defined in struct attribute. + * @is_bin_visible: + * Optional: Function to return permissions associated with a + * binary attribute of the group. Will be called repeatedly + * for each binary attribute in the group. Only read/write + * permissions as well as SYSFS_PREALLOC are accepted. Must + * return 0 if a binary attribute is not visible. The returned + * value will replace static permissions defined in + * struct bin_attribute. * @attrs: Pointer to NULL terminated list of attributes. * @bin_attrs: Pointer to NULL terminated list of binary attributes. * Either attrs or bin_attrs or both must be provided. @@ -76,6 +84,8 @@ struct attribute_group { const char *name; umode_t (*is_visible)(struct kobject *, struct attribute *, int); + umode_t (*is_bin_visible)(struct kobject *, + struct bin_attribute *, int); struct attribute **attrs; struct bin_attribute **bin_attrs; }; -- cgit v1.2.3 From 18800fc7a04e7df8a345e7ef4fc3064368276f83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20L=C3=B3pez?= Date: Mon, 21 Sep 2015 10:38:22 -0300 Subject: platform/chrome: Support reading/writing the vboot context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some EC implementations include a small nvram space used to store verified boot context data. This patch offers a way to expose this data to userspace. Reviewed-by: Javier Martinez Canillas Signed-off-by: Emilio López Signed-off-by: Olof Johansson --- include/linux/mfd/cros_ec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index da72671a42fa..494682ce4bf3 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -255,5 +255,6 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev); /* sysfs stuff */ extern struct attribute_group cros_ec_attr_group; extern struct attribute_group cros_ec_lightbar_attr_group; +extern struct attribute_group cros_ec_vbc_attr_group; #endif /* __LINUX_MFD_CROS_EC_H */ -- cgit v1.2.3 From edc1b01cd3b20a5fff049e98f82a2b0d24a34c89 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Oct 2015 10:53:49 -0400 Subject: SUNRPC: Move TCP receive data path into a workqueue context Stream protocols such as TCP can often build up a backlog of data to be read due to ordering. Combine this with the fact that some workloads such as NFS read()-intensive workloads need to receive a lot of data per RPC call, and it turns out that receiving the data from inside a softirq context can cause starvation. The following patch moves the TCP data receive into a workqueue context. We still end up calling tcp_read_sock(), but we do so from a process context, meaning that softirqs are enabled for most of the time. With this patch, I see a doubling of read bandwidth when running a multi-threaded iozone workload between a virtual client and server setup. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 357e44c1a46b..0ece4ba06f06 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -44,6 +44,8 @@ struct sock_xprt { */ unsigned long sock_state; struct delayed_work connect_worker; + struct work_struct recv_worker; + struct mutex recv_mutex; struct sockaddr_storage srcaddr; unsigned short srcport; -- cgit v1.2.3 From 516285ebe0efadc40b914a0e61a913a390604810 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 20 Sep 2015 16:15:24 -0400 Subject: NFSv4: nfs4_async_handle_error should take a non-const nfs_server For symmetry with the synchronous handler, and so that we can potentially handle errors such as NFS4ERR_BADNAME. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 52faf7e96c65..53f2acc68baf 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -528,7 +528,7 @@ struct nfs4_delegreturnargs { struct nfs4_delegreturnres { struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; - const struct nfs_server *server; + struct nfs_server *server; }; /* @@ -601,7 +601,7 @@ struct nfs_removeargs { struct nfs_removeres { struct nfs4_sequence_res seq_res; - const struct nfs_server *server; + struct nfs_server *server; struct nfs_fattr *dir_attr; struct nfs4_change_info cinfo; }; @@ -619,7 +619,7 @@ struct nfs_renameargs { struct nfs_renameres { struct nfs4_sequence_res seq_res; - const struct nfs_server *server; + struct nfs_server *server; struct nfs4_change_info old_cinfo; struct nfs_fattr *old_fattr; struct nfs4_change_info new_cinfo; -- cgit v1.2.3 From 38a1bdc9ff9f6c8cfad228eac5c1ce31ce038b25 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Fri, 19 Jun 2015 15:31:46 +0100 Subject: firmware: arm_scpi: Extend to support sensors ARM System Control Processor (SCP) provides an API to query and use the sensors available in the system. Extend the SCPI driver to support sensor messages. Signed-off-by: Punit Agrawal Acked-by: Sudeep Holla --- include/linux/scpi_protocol.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h index e7169cd54e19..80af3cd35ae4 100644 --- a/include/linux/scpi_protocol.h +++ b/include/linux/scpi_protocol.h @@ -28,6 +28,20 @@ struct scpi_dvfs_info { struct scpi_opp *opps; }; +enum scpi_sensor_class { + TEMPERATURE, + VOLTAGE, + CURRENT, + POWER, +}; + +struct scpi_sensor_info { + u16 sensor_id; + u8 class; + u8 trigger_type; + char name[20]; +} __packed; + /** * struct scpi_ops - represents the various operations provided * by SCP through SCPI message protocol @@ -52,6 +66,9 @@ struct scpi_ops { int (*dvfs_get_idx)(u8); int (*dvfs_set_idx)(u8, u8); struct scpi_dvfs_info *(*dvfs_get_info)(u8); + int (*sensor_get_capability)(u16 *sensors); + int (*sensor_get_info)(u16 sensor_id, struct scpi_sensor_info *); + int (*sensor_get_value)(u16, u32 *); }; #if IS_ENABLED(CONFIG_ARM_SCPI_PROTOCOL) -- cgit v1.2.3 From 9a764234eee689ea800424ab99b08ff07a8bdbcd Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 14 Sep 2015 12:09:34 -0700 Subject: soc: brcmstb: Add Bus Interface Unit control setup Broadcom STB SoCs (brcmstb) require an early setup of their Bus Interface Unit control register, this needs to happen before SMP is brought up because it affects how the CPU complex will be interfaced to the memory controller. Add support code which properly initializes the BIU registers based on whether "brcm,write-pairing" is present in Device Tree, and take care of saving and restoring credit register settings during system-wide suspend/resume operations. Acked-by: Gregory Fong Signed-off-by: Florian Fainelli --- include/linux/soc/brcmstb/brcmstb.h | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 include/linux/soc/brcmstb/brcmstb.h (limited to 'include/linux') diff --git a/include/linux/soc/brcmstb/brcmstb.h b/include/linux/soc/brcmstb/brcmstb.h new file mode 100644 index 000000000000..337ce414e898 --- /dev/null +++ b/include/linux/soc/brcmstb/brcmstb.h @@ -0,0 +1,10 @@ +#ifndef __BRCMSTB_SOC_H +#define __BRCMSTB_SOC_H + +/* + * Bus Interface Unit control register setup, must happen early during boot, + * before SMP is brought up, called by machine entry point. + */ +void brcmstb_biuctrl_init(void); + +#endif /* __BRCMSTB_SOC_H */ -- cgit v1.2.3 From a639315d6c536c806724c9328941a2517507e3e3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 15 Sep 2015 02:14:03 -0400 Subject: pmem: kill memremap_pmem() Now that the pmem-api is defined as "a set of apis that enables access to WB mapped pmem", the mapping type is implied. Remove the wrapper and push the functionality down into the pmem driver in preparation for adding support for direct-mapped pmem. Reviewed-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/pmem.h | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 85f810b33917..acfea8ce4a07 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -65,11 +65,6 @@ static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t si memcpy(dst, (void __force const *) src, size); } -static inline void memunmap_pmem(struct device *dev, void __pmem *addr) -{ - devm_memunmap(dev, (void __force *) addr); -} - static inline bool arch_has_pmem_api(void) { return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); @@ -93,7 +88,7 @@ static inline bool arch_has_wmb_pmem(void) * These defaults seek to offer decent performance and minimize the * window between i/o completion and writes being durable on media. * However, it is undefined / architecture specific whether - * default_memremap_pmem + default_memcpy_to_pmem is sufficient for + * ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for * making data durable relative to i/o completion. */ static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src, @@ -116,25 +111,6 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size) memset((void __force *)addr, 0, size); } -/** - * memremap_pmem - map physical persistent memory for pmem api - * @offset: physical address of persistent memory - * @size: size of the mapping - * - * Establish a mapping of the architecture specific memory type expected - * by memcpy_to_pmem() and wmb_pmem(). For example, it may be - * the case that an uncacheable or writethrough mapping is sufficient, - * or a writeback mapping provided memcpy_to_pmem() and - * wmb_pmem() arrange for the data to be written through the - * cache to persistent media. - */ -static inline void __pmem *memremap_pmem(struct device *dev, - resource_size_t offset, unsigned long size) -{ - return (void __pmem *) devm_memremap(dev, offset, size, - ARCH_MEMREMAP_PMEM); -} - /** * memcpy_to_pmem - copy data to persistent memory * @dst: destination buffer for the copy -- cgit v1.2.3 From 7c683941f30a977c10ec6be174ec5f16939c7ce5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Oct 2015 20:35:55 -0400 Subject: devm: make allocations numa aware by default Given we already have a device just use dev_to_node() to provide hint allocations for devres. However, current devres_alloc() users will need to explicitly opt-in with devres_alloc_node(). Reviewed-by: Tejun Heo Reviewed-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/device.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 5d7bc6349930..b8f411b57dcb 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -604,13 +604,21 @@ typedef void (*dr_release_t)(struct device *dev, void *res); typedef int (*dr_match_t)(struct device *dev, void *res, void *match_data); #ifdef CONFIG_DEBUG_DEVRES -extern void *__devres_alloc(dr_release_t release, size_t size, gfp_t gfp, - const char *name); +extern void *__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, + int nid, const char *name); #define devres_alloc(release, size, gfp) \ - __devres_alloc(release, size, gfp, #release) + __devres_alloc_node(release, size, gfp, NUMA_NO_NODE, #release) +#define devres_alloc_node(release, size, gfp, nid) \ + __devres_alloc_node(release, size, gfp, nid, #release) #else -extern void *devres_alloc(dr_release_t release, size_t size, gfp_t gfp); +extern void *devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, + int nid); +static inline void *devres_alloc(dr_release_t release, size_t size, gfp_t gfp) +{ + return devres_alloc_node(release, size, gfp, NUMA_NO_NODE); +} #endif + extern void devres_for_each_res(struct device *dev, dr_release_t release, dr_match_t match, void *match_data, void (*fn)(struct device *, void *, void *), -- cgit v1.2.3 From e866a2e3950fe2f708d5cc67d641b1725ef7a708 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 1 Oct 2015 23:45:31 +0200 Subject: linux/thermal.h: rename KELVIN_TO_CELSIUS to DECI_KELVIN_TO_CELSIUS The macros KELVIN_TO_CELSIUS and CELSIUS_TO_KELVIN actually convert between deciKelvins and Celsius, so rename them to reflect that. While at it, use a statement expression in DECI_KELVIN_TO_CELSIUS to prevent expanding the argument multiple times and get rid of a few casts. Signed-off-by: Rasmus Villemoes Acked-by: Darren Hart Signed-off-by: Zhang Rui --- include/linux/thermal.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 157d366e761b..4014a59828fc 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -44,9 +44,11 @@ #define THERMAL_WEIGHT_DEFAULT 0 /* Unit conversion macros */ -#define KELVIN_TO_CELSIUS(t) (long)(((long)t-2732 >= 0) ? \ - ((long)t-2732+5)/10 : ((long)t-2732-5)/10) -#define CELSIUS_TO_KELVIN(t) ((t)*10+2732) +#define DECI_KELVIN_TO_CELSIUS(t) ({ \ + long _t = (t); \ + ((_t-2732 >= 0) ? (_t-2732+5)/10 : (_t-2732-5)/10); \ +}) +#define CELSIUS_TO_DECI_KELVIN(t) ((t)*10+2732) #define DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, off) (((t) - (off)) * 100) #define DECI_KELVIN_TO_MILLICELSIUS(t) DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, 2732) #define MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, off) (((t) / 100) + (off)) -- cgit v1.2.3 From 0ad95472bf169a3501991f8f33f5147f792a8116 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 23 Sep 2015 15:49:29 +0300 Subject: lockd: create NSM handles per net namespace Commit cb7323fffa85 ("lockd: create and use per-net NSM RPC clients on MON/UNMON requests") introduced per-net NSM RPC clients. Unfortunately this doesn't make any sense without per-net nsm_handle. E.g. the following scenario could happen Two hosts (X and Y) in different namespaces (A and B) share the same nsm struct. 1. nsm_monitor(host_X) called => NSM rpc client created, nsm->sm_monitored bit set. 2. nsm_mointor(host-Y) called => nsm->sm_monitored already set, we just exit. Thus in namespace B ln->nsm_clnt == NULL. 3. host X destroyed => nsm->sm_count decremented to 1 4. host Y destroyed => nsm_unmonitor() => nsm_mon_unmon() => NULL-ptr dereference of *ln->nsm_clnt So this could be fixed by making per-net nsm_handles list, instead of global. Thus different net namespaces will not be able share the same nsm_handle. Signed-off-by: Andrey Ryabinin Cc: Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index ff82a32871b5..fd3b65bf51b5 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -235,7 +235,8 @@ void nlm_rebind_host(struct nlm_host *); struct nlm_host * nlm_get_host(struct nlm_host *); void nlm_shutdown_hosts(void); void nlm_shutdown_hosts_net(struct net *net); -void nlm_host_rebooted(const struct nlm_reboot *); +void nlm_host_rebooted(const struct net *net, + const struct nlm_reboot *); /* * Host monitoring @@ -243,11 +244,13 @@ void nlm_host_rebooted(const struct nlm_reboot *); int nsm_monitor(const struct nlm_host *host); void nsm_unmonitor(const struct nlm_host *host); -struct nsm_handle *nsm_get_handle(const struct sockaddr *sap, +struct nsm_handle *nsm_get_handle(const struct net *net, + const struct sockaddr *sap, const size_t salen, const char *hostname, const size_t hostname_len); -struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info); +struct nsm_handle *nsm_reboot_lookup(const struct net *net, + const struct nlm_reboot *info); void nsm_release(struct nsm_handle *nsm); /* -- cgit v1.2.3 From 870823e629ea194e6cf8e82a9694ac62cad49512 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Oct 2015 15:32:37 +0200 Subject: configfs: add show and store methods to struct configfs_attribute Add methods to struct configfs_attribute to directly show and store attributes without adding boilerplate code to every user. In addition to the methods this also adds 3 helper macros to define read/write, read-only and write-only attributes with a single line of code. Signed-off-by: Christoph Hellwig Reviewed-by: Nicholas Bellinger Acked-by: Greg Kroah-Hartman Signed-off-by: Nicholas Bellinger --- include/linux/configfs.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 63a36e89d0eb..85e9956a86de 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -125,8 +125,35 @@ struct configfs_attribute { const char *ca_name; struct module *ca_owner; umode_t ca_mode; + ssize_t (*show)(struct config_item *, char *); + ssize_t (*store)(struct config_item *, const char *, size_t); }; +#define CONFIGFS_ATTR(_pfx, _name) \ +static struct configfs_attribute _pfx##attr_##_name = { \ + .ca_name = __stringify(_name), \ + .ca_mode = S_IRUGO | S_IWUSR, \ + .ca_owner = THIS_MODULE, \ + .show = _pfx##_name##_show, \ + .store = _pfx##_name##_store, \ +} + +#define CONFIGFS_ATTR_RO(_pfx, _name) \ +static struct configfs_attribute _pfx##attr_##_name = { \ + .ca_name = __stringify(_name), \ + .ca_mode = S_IRUGO, \ + .ca_owner = THIS_MODULE, \ + .show = _pfx##_name##_show, \ +} + +#define CONFIGFS_ATTR_WO(_pfx, _name) \ +static struct configfs_attribute _pfx##attr_##_name = { \ + .ca_name = __stringify(_name), \ + .ca_mode = S_IWUSR, \ + .ca_owner = THIS_MODULE, \ + .store = _pfx##_name##_store, \ +} + /* * Users often need to create attribute structures for their configurable * attributes, containing a configfs_attribute member and function pointers -- cgit v1.2.3 From 45b6a73f62ebcf3ff067895fb8030e67f4c7b67f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Oct 2015 15:32:38 +0200 Subject: usb-gadget: use per-attribute show and store methods To simplify the configfs interface and remove boilerplate code that also causes binary bloat. Signed-off-by: Christoph Hellwig Reviewed-by: Andrzej Pietrasiewicz Acked-by: Felipe Balbi Signed-off-by: Nicholas Bellinger --- include/linux/usb/gadget_configfs.h | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/gadget_configfs.h b/include/linux/usb/gadget_configfs.h index d74c0ae989d5..c36e95730de1 100644 --- a/include/linux/usb/gadget_configfs.h +++ b/include/linux/usb/gadget_configfs.h @@ -7,9 +7,10 @@ int check_user_usb_string(const char *name, struct usb_gadget_strings *stringtab_dev); #define GS_STRINGS_W(__struct, __name) \ - static ssize_t __struct##_##__name##_store(struct __struct *gs, \ +static ssize_t __struct##_##__name##_store(struct config_item *item, \ const char *page, size_t len) \ { \ + struct __struct *gs = to_##__struct(item); \ int ret; \ \ ret = usb_string_copy(page, &gs->__name); \ @@ -19,30 +20,20 @@ int check_user_usb_string(const char *name, } #define GS_STRINGS_R(__struct, __name) \ - static ssize_t __struct##_##__name##_show(struct __struct *gs, \ - char *page) \ +static ssize_t __struct##_##__name##_show(struct config_item *item, char *page) \ { \ + struct __struct *gs = to_##__struct(item); \ return sprintf(page, "%s\n", gs->__name ?: ""); \ } -#define GS_STRING_ITEM_ATTR(struct_name, name) \ - static struct struct_name##_attribute struct_name##_##name = \ - __CONFIGFS_ATTR(name, S_IRUGO | S_IWUSR, \ - struct_name##_##name##_show, \ - struct_name##_##name##_store) - #define GS_STRINGS_RW(struct_name, _name) \ GS_STRINGS_R(struct_name, _name) \ GS_STRINGS_W(struct_name, _name) \ - GS_STRING_ITEM_ATTR(struct_name, _name) + CONFIGFS_ATTR(struct_name##_, _name) #define USB_CONFIG_STRING_RW_OPS(struct_in) \ - CONFIGFS_ATTR_OPS(struct_in); \ - \ static struct configfs_item_operations struct_in##_langid_item_ops = { \ .release = struct_in##_attr_release, \ - .show_attribute = struct_in##_attr_show, \ - .store_attribute = struct_in##_attr_store, \ }; \ \ static struct config_item_type struct_in##_langid_type = { \ -- cgit v1.2.3 From 517982229f78b2aebf00a8a337e84e8eeea70b8e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Oct 2015 15:32:59 +0200 Subject: configfs: remove old API Remove the old show_attribute and store_attribute methods and update the documentation. Also replace the two C samples with a single new one in the proper samples directory where people expect to find it. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- include/linux/configfs.h | 82 ------------------------------------------------ 1 file changed, 82 deletions(-) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 85e9956a86de..a8a335b7fce0 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -154,86 +154,6 @@ static struct configfs_attribute _pfx##attr_##_name = { \ .store = _pfx##_name##_store, \ } -/* - * Users often need to create attribute structures for their configurable - * attributes, containing a configfs_attribute member and function pointers - * for the show() and store() operations on that attribute. If they don't - * need anything else on the extended attribute structure, they can use - * this macro to define it The argument _item is the name of the - * config_item structure. - */ -#define CONFIGFS_ATTR_STRUCT(_item) \ -struct _item##_attribute { \ - struct configfs_attribute attr; \ - ssize_t (*show)(struct _item *, char *); \ - ssize_t (*store)(struct _item *, const char *, size_t); \ -} - -/* - * With the extended attribute structure, users can use this macro - * (similar to sysfs' __ATTR) to make defining attributes easier. - * An example: - * #define MYITEM_ATTR(_name, _mode, _show, _store) \ - * struct myitem_attribute childless_attr_##_name = \ - * __CONFIGFS_ATTR(_name, _mode, _show, _store) - */ -#define __CONFIGFS_ATTR(_name, _mode, _show, _store) \ -{ \ - .attr = { \ - .ca_name = __stringify(_name), \ - .ca_mode = _mode, \ - .ca_owner = THIS_MODULE, \ - }, \ - .show = _show, \ - .store = _store, \ -} -/* Here is a readonly version, only requiring a show() operation */ -#define __CONFIGFS_ATTR_RO(_name, _show) \ -{ \ - .attr = { \ - .ca_name = __stringify(_name), \ - .ca_mode = 0444, \ - .ca_owner = THIS_MODULE, \ - }, \ - .show = _show, \ -} - -/* - * With these extended attributes, the simple show_attribute() and - * store_attribute() operations need to call the show() and store() of the - * attributes. This is a common pattern, so we provide a macro to define - * them. The argument _item is the name of the config_item structure. - * This macro expects the attributes to be named "struct _attribute" - * and the function to_() to exist; - */ -#define CONFIGFS_ATTR_OPS(_item) \ -static ssize_t _item##_attr_show(struct config_item *item, \ - struct configfs_attribute *attr, \ - char *page) \ -{ \ - struct _item *_item = to_##_item(item); \ - struct _item##_attribute *_item##_attr = \ - container_of(attr, struct _item##_attribute, attr); \ - ssize_t ret = 0; \ - \ - if (_item##_attr->show) \ - ret = _item##_attr->show(_item, page); \ - return ret; \ -} \ -static ssize_t _item##_attr_store(struct config_item *item, \ - struct configfs_attribute *attr, \ - const char *page, size_t count) \ -{ \ - struct _item *_item = to_##_item(item); \ - struct _item##_attribute *_item##_attr = \ - container_of(attr, struct _item##_attribute, attr); \ - ssize_t ret = -EINVAL; \ - \ - if (_item##_attr->store) \ - ret = _item##_attr->store(_item, page, count); \ - return ret; \ -} - /* * If allow_link() exists, the item can symlink(2) out to other * items. If the item is a group, it may support mkdir(2). @@ -250,8 +170,6 @@ static ssize_t _item##_attr_store(struct config_item *item, \ */ struct configfs_item_operations { void (*release)(struct config_item *); - ssize_t (*show_attribute)(struct config_item *, struct configfs_attribute *,char *); - ssize_t (*store_attribute)(struct config_item *,struct configfs_attribute *,const char *, size_t); int (*allow_link)(struct config_item *src, struct config_item *target); int (*drop_link)(struct config_item *src, struct config_item *target); }; -- cgit v1.2.3 From d28c2b36d6027702585ca93773b3edd6e5f1a5bd Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Oct 2015 14:42:44 +0300 Subject: ARM: common: edma: Remove unused functions We no longer have users for these functions so they can be removed. Remove also unused enums from the header file. Signed-off-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- include/linux/platform_data/edma.h | 33 --------------------------------- 1 file changed, 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h index bdb2710e2aab..c1862423b356 100644 --- a/include/linux/platform_data/edma.h +++ b/include/linux/platform_data/edma.h @@ -72,20 +72,6 @@ struct edmacc_param { #define EDMA_DMA_TC1_ERROR 3 #define EDMA_DMA_TC2_ERROR 4 -enum address_mode { - INCR = 0, - FIFO = 1 -}; - -enum fifo_width { - W8BIT = 0, - W16BIT = 1, - W32BIT = 2, - W64BIT = 3, - W128BIT = 4, - W256BIT = 5 -}; - enum dma_event_q { EVENTQ_0 = 0, EVENTQ_1 = 1, @@ -94,11 +80,6 @@ enum dma_event_q { EVENTQ_DEFAULT = -1 }; -enum sync_dimension { - ASYNC = 0, - ABSYNC = 1 -}; - #define EDMA_CTLR_CHAN(ctlr, chan) (((ctlr) << 16) | (chan)) #define EDMA_CTLR(i) ((i) >> 16) #define EDMA_CHAN_SLOT(i) ((i) & 0xffff) @@ -121,22 +102,9 @@ void edma_free_channel(unsigned channel); int edma_alloc_slot(unsigned ctlr, int slot); void edma_free_slot(unsigned slot); -/* alloc/free a set of contiguous parameter RAM slots */ -int edma_alloc_cont_slots(unsigned ctlr, unsigned int id, int slot, int count); -int edma_free_cont_slots(unsigned slot, int count); - /* calls that operate on part of a parameter RAM slot */ -void edma_set_src(unsigned slot, dma_addr_t src_port, - enum address_mode mode, enum fifo_width); -void edma_set_dest(unsigned slot, dma_addr_t dest_port, - enum address_mode mode, enum fifo_width); dma_addr_t edma_get_position(unsigned slot, bool dst); -void edma_set_src_index(unsigned slot, s16 src_bidx, s16 src_cidx); -void edma_set_dest_index(unsigned slot, s16 dest_bidx, s16 dest_cidx); -void edma_set_transfer_params(unsigned slot, u16 acnt, u16 bcnt, u16 ccnt, - u16 bcnt_rld, enum sync_dimension sync_mode); void edma_link(unsigned from, unsigned to); -void edma_unlink(unsigned from); /* calls that operate on an entire parameter RAM slot */ void edma_write_slot(unsigned slot, const struct edmacc_param *params); @@ -146,7 +114,6 @@ void edma_read_slot(unsigned slot, struct edmacc_param *params); int edma_start(unsigned channel); void edma_stop(unsigned channel); void edma_clean_channel(unsigned channel); -void edma_clear_event(unsigned channel); void edma_pause(unsigned channel); void edma_resume(unsigned channel); -- cgit v1.2.3 From ca304fa9bb762f091e851d48de43f623c975d47a Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Oct 2015 14:42:49 +0300 Subject: ARM/dmaengine: edma: Public API to use private struct pointer Instead of relying on indexes pointing to edma private date in the global pointer array, pass the private data pointer via the public API. Signed-off-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- include/linux/platform_data/edma.h | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h index c1862423b356..466021c03169 100644 --- a/include/linux/platform_data/edma.h +++ b/include/linux/platform_data/edma.h @@ -92,32 +92,40 @@ enum dma_event_q { #define EDMA_MAX_CC 2 +struct edma; + +struct edma *edma_get_data(struct device *edma_dev); + /* alloc/free DMA channels and their dedicated parameter RAM slots */ -int edma_alloc_channel(int channel, +int edma_alloc_channel(struct edma *cc, int channel, void (*callback)(unsigned channel, u16 ch_status, void *data), void *data, enum dma_event_q); -void edma_free_channel(unsigned channel); +void edma_free_channel(struct edma *cc, unsigned channel); /* alloc/free parameter RAM slots */ -int edma_alloc_slot(unsigned ctlr, int slot); -void edma_free_slot(unsigned slot); +int edma_alloc_slot(struct edma *cc, int slot); +void edma_free_slot(struct edma *cc, unsigned slot); /* calls that operate on part of a parameter RAM slot */ -dma_addr_t edma_get_position(unsigned slot, bool dst); -void edma_link(unsigned from, unsigned to); +dma_addr_t edma_get_position(struct edma *cc, unsigned slot, bool dst); +void edma_link(struct edma *cc, unsigned from, unsigned to); /* calls that operate on an entire parameter RAM slot */ -void edma_write_slot(unsigned slot, const struct edmacc_param *params); -void edma_read_slot(unsigned slot, struct edmacc_param *params); +void edma_write_slot(struct edma *cc, unsigned slot, + const struct edmacc_param *params); +void edma_read_slot(struct edma *cc, unsigned slot, + struct edmacc_param *params); /* channel control operations */ -int edma_start(unsigned channel); -void edma_stop(unsigned channel); -void edma_clean_channel(unsigned channel); -void edma_pause(unsigned channel); -void edma_resume(unsigned channel); +int edma_start(struct edma *cc, unsigned channel); +void edma_stop(struct edma *cc, unsigned channel); +void edma_clean_channel(struct edma *cc, unsigned channel); +void edma_pause(struct edma *cc, unsigned channel); +void edma_resume(struct edma *cc, unsigned channel); +int edma_trigger_channel(struct edma *cc, unsigned channel); -void edma_assign_channel_eventq(unsigned channel, enum dma_event_q eventq_no); +void edma_assign_channel_eventq(struct edma *cc, unsigned channel, + enum dma_event_q eventq_no); struct edma_rsv_info { @@ -141,6 +149,4 @@ struct edma_soc_info { const s16 (*xbar_chans)[2]; }; -int edma_trigger_channel(unsigned); - #endif -- cgit v1.2.3 From 2b6b3b7420190888793c49e97276e1e73bd7eaed Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Oct 2015 14:42:53 +0300 Subject: ARM/dmaengine: edma: Merge the two drivers under drivers/dma/ Move the code out from arch/arm/common and merge it inside of the dmaengine driver. This change is done with as minimal (if eny) functional change to the code as possible to avoid introducing regression. Signed-off-by: Peter Ujfalusi Acked-by: Tony Lindgren Signed-off-by: Vinod Koul --- include/linux/platform_data/edma.h | 74 -------------------------------------- 1 file changed, 74 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h index 466021c03169..6b9d500956e4 100644 --- a/include/linux/platform_data/edma.h +++ b/include/linux/platform_data/edma.h @@ -41,37 +41,6 @@ #ifndef EDMA_H_ #define EDMA_H_ -/* PaRAM slots are laid out like this */ -struct edmacc_param { - u32 opt; - u32 src; - u32 a_b_cnt; - u32 dst; - u32 src_dst_bidx; - u32 link_bcntrld; - u32 src_dst_cidx; - u32 ccnt; -} __packed; - -/* fields in edmacc_param.opt */ -#define SAM BIT(0) -#define DAM BIT(1) -#define SYNCDIM BIT(2) -#define STATIC BIT(3) -#define EDMA_FWID (0x07 << 8) -#define TCCMODE BIT(11) -#define EDMA_TCC(t) ((t) << 12) -#define TCINTEN BIT(20) -#define ITCINTEN BIT(21) -#define TCCHEN BIT(22) -#define ITCCHEN BIT(23) - -/*ch_status paramater of callback function possible values*/ -#define EDMA_DMA_COMPLETE 1 -#define EDMA_DMA_CC_ERROR 2 -#define EDMA_DMA_TC1_ERROR 3 -#define EDMA_DMA_TC2_ERROR 4 - enum dma_event_q { EVENTQ_0 = 0, EVENTQ_1 = 1, @@ -84,49 +53,6 @@ enum dma_event_q { #define EDMA_CTLR(i) ((i) >> 16) #define EDMA_CHAN_SLOT(i) ((i) & 0xffff) -#define EDMA_CHANNEL_ANY -1 /* for edma_alloc_channel() */ -#define EDMA_SLOT_ANY -1 /* for edma_alloc_slot() */ -#define EDMA_CONT_PARAMS_ANY 1001 -#define EDMA_CONT_PARAMS_FIXED_EXACT 1002 -#define EDMA_CONT_PARAMS_FIXED_NOT_EXACT 1003 - -#define EDMA_MAX_CC 2 - -struct edma; - -struct edma *edma_get_data(struct device *edma_dev); - -/* alloc/free DMA channels and their dedicated parameter RAM slots */ -int edma_alloc_channel(struct edma *cc, int channel, - void (*callback)(unsigned channel, u16 ch_status, void *data), - void *data, enum dma_event_q); -void edma_free_channel(struct edma *cc, unsigned channel); - -/* alloc/free parameter RAM slots */ -int edma_alloc_slot(struct edma *cc, int slot); -void edma_free_slot(struct edma *cc, unsigned slot); - -/* calls that operate on part of a parameter RAM slot */ -dma_addr_t edma_get_position(struct edma *cc, unsigned slot, bool dst); -void edma_link(struct edma *cc, unsigned from, unsigned to); - -/* calls that operate on an entire parameter RAM slot */ -void edma_write_slot(struct edma *cc, unsigned slot, - const struct edmacc_param *params); -void edma_read_slot(struct edma *cc, unsigned slot, - struct edmacc_param *params); - -/* channel control operations */ -int edma_start(struct edma *cc, unsigned channel); -void edma_stop(struct edma *cc, unsigned channel); -void edma_clean_channel(struct edma *cc, unsigned channel); -void edma_pause(struct edma *cc, unsigned channel); -void edma_resume(struct edma *cc, unsigned channel); -int edma_trigger_channel(struct edma *cc, unsigned channel); - -void edma_assign_channel_eventq(struct edma *cc, unsigned channel, - enum dma_event_q eventq_no); - struct edma_rsv_info { const s16 (*rsv_chans)[2]; -- cgit v1.2.3 From 1a7caca20ed56a80cea045327deaeb4e4379cbd1 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 28 Aug 2015 10:39:20 -0700 Subject: soc: qcom: smd: Implement id_table driver matching Implement a id_table based driver maching mechanism for drivers that binds to fixed channels and doesn't need any additional configuration, e.g. IPCRTR and DIAG. Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/soc/qcom/smd.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h index d7e50aa6a4ac..d0cb6d189a0a 100644 --- a/include/linux/soc/qcom/smd.h +++ b/include/linux/soc/qcom/smd.h @@ -8,6 +8,14 @@ struct qcom_smd; struct qcom_smd_channel; struct qcom_smd_lookup; +/** + * struct qcom_smd_id - struct used for matching a smd device + * @name: name of the channel + */ +struct qcom_smd_id { + char name[20]; +}; + /** * struct qcom_smd_device - smd device struct * @dev: the device struct @@ -21,6 +29,7 @@ struct qcom_smd_device { /** * struct qcom_smd_driver - smd driver struct * @driver: underlying device driver + * @smd_match_table: static channel match table * @probe: invoked when the smd channel is found * @remove: invoked when the smd channel is closed * @callback: invoked when an inbound message is received on the channel, @@ -29,6 +38,8 @@ struct qcom_smd_device { */ struct qcom_smd_driver { struct device_driver driver; + const struct qcom_smd_id *smd_match_table; + int (*probe)(struct qcom_smd_device *dev); void (*remove)(struct qcom_smd_device *dev); int (*callback)(struct qcom_smd_device *, const void *, size_t); -- cgit v1.2.3 From 1a03964dec3cecb6382d172b9dfe318735c2cad7 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 2 Sep 2015 15:46:44 -0700 Subject: soc: qcom: Make qcom_smem_get() return a pointer Passing a void ** almost always requires a cast at the call site. Instead of littering the code with casts every time this function is called, have qcom_smem_get() return a void pointer to the location of the smem item. This frees the caller from having to cast the pointer. Cc: Bjorn Andersson Signed-off-by: Stephen Boyd Reviewed-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/soc/qcom/smem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smem.h b/include/linux/soc/qcom/smem.h index bc9630d3aced..785e196ee2ca 100644 --- a/include/linux/soc/qcom/smem.h +++ b/include/linux/soc/qcom/smem.h @@ -4,7 +4,7 @@ #define QCOM_SMEM_HOST_ANY -1 int qcom_smem_alloc(unsigned host, unsigned item, size_t size); -int qcom_smem_get(unsigned host, unsigned item, void **ptr, size_t *size); +void *qcom_smem_get(unsigned host, unsigned item, size_t *size); int qcom_smem_get_free_space(unsigned host); -- cgit v1.2.3 From 2d3c277ca5b1a9c12cde1f760ff925b87608bc76 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 29 Sep 2015 15:48:55 -0400 Subject: qcom-scm: add missing prototype for qcom_scm_is_available() Signed-off-by: Rob Clark Signed-off-by: Andy Gross --- include/linux/qcom_scm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 6e7d5ec65838..9e12000914b3 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -23,6 +23,8 @@ struct qcom_scm_hdcp_req { u32 val; }; +extern bool qcom_scm_is_available(void); + extern bool qcom_scm_hdcp_available(void); extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp); -- cgit v1.2.3 From 36022770de6cf9a403c40a68712ed2d2ea2746be Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 26 Sep 2015 02:24:34 +0800 Subject: nfs42: add CLONE xdr functions xdr definitions per draft-ietf-nfsv4-minorversion2-38.txt Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 2 ++ include/linux/nfs_xdr.h | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 00121f298269..c0c695b634d0 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -130,6 +130,7 @@ enum nfs_opnum4 { OP_READ_PLUS = 68, OP_SEEK = 69, OP_WRITE_SAME = 70, + OP_CLONE = 71, OP_ILLEGAL = 10044, }; @@ -501,6 +502,7 @@ enum { NFSPROC4_CLNT_ALLOCATE, NFSPROC4_CLNT_DEALLOCATE, NFSPROC4_CLNT_LAYOUTSTATS, + NFSPROC4_CLNT_CLONE, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 52faf7e96c65..ac678b7a65ed 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -359,6 +359,25 @@ struct nfs42_layoutstat_data { struct nfs42_layoutstat_res res; }; +struct nfs42_clone_args { + struct nfs4_sequence_args seq_args; + struct nfs_fh *src_fh; + struct nfs_fh *dst_fh; + nfs4_stateid src_stateid; + nfs4_stateid dst_stateid; + __u64 src_offset; + __u64 dst_offset; + __u64 count; + const u32 *dst_bitmask; +}; + +struct nfs42_clone_res { + struct nfs4_sequence_res seq_res; + unsigned int rpc_status; + struct nfs_fattr *dst_fattr; + const struct nfs_server *server; +}; + struct stateowner_id { __u64 create_time; __u32 uniquifier; -- cgit v1.2.3 From e5341f3a5762d17be9cdd06257c02c0098bdcab8 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 26 Sep 2015 02:24:35 +0800 Subject: nfs42: add CLONE proc functions Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 570a7df2775b..a50de1002b20 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -243,5 +243,6 @@ struct nfs_server { #define NFS_CAP_ALLOCATE (1U << 20) #define NFS_CAP_DEALLOCATE (1U << 21) #define NFS_CAP_LAYOUTSTATS (1U << 22) +#define NFS_CAP_CLONE (1U << 23) #endif -- cgit v1.2.3 From 2a92ee92d4545448066fb664674c0ae5a9d5ea99 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 26 Sep 2015 02:24:37 +0800 Subject: nfs: get clone_blksize when probing fsinfo NFSv42 CLONE operation is supposed to respect it. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 1 + include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 1 + 3 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index c0c695b634d0..e7e78537aea2 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -422,6 +422,7 @@ enum lock_type4 { #define FATTR4_WORD2_LAYOUT_TYPES (1UL << 0) #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) +#define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) /* MDS threshold bitmap bits */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a50de1002b20..2469ab0bb3a1 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -147,6 +147,7 @@ struct nfs_server { unsigned int acdirmax; unsigned int namelen; unsigned int options; /* extra options enabled by mount */ + unsigned int clone_blksize; /* granularity of a CLONE operation */ #define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */ #define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ac678b7a65ed..92ff445e60a0 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -141,6 +141,7 @@ struct nfs_fsinfo { __u32 lease_time; /* in seconds */ __u32 layouttype; /* supported pnfs layout driver */ __u32 blksize; /* preferred pnfs io block size */ + __u32 clone_blksize; /* granularity of a CLONE operation */ }; struct nfs_fsstat { -- cgit v1.2.3 From 203d027de4d7068c607b60d4310a1599dec8839f Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 28 Aug 2015 11:56:26 +0200 Subject: vga_switcheroo: Use enum vga_switcheroo_state instead of int Signed-off-by: Lukas Wunner Reviewed-by: Jani Nikula Reviewed-by: Alex Deucher Signed-off-by: Daniel Vetter --- include/linux/vga_switcheroo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index 376499197717..e63661757505 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -138,7 +138,7 @@ void vga_switcheroo_unregister_handler(void); int vga_switcheroo_process_delayed_switch(void); -int vga_switcheroo_get_client_state(struct pci_dev *dev); +enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev); void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic); @@ -157,7 +157,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, int id) { return 0; } static inline void vga_switcheroo_unregister_handler(void) {} static inline int vga_switcheroo_process_delayed_switch(void) { return 0; } -static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; } +static inline enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; } static inline void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic) {} -- cgit v1.2.3 From 21c5ba8c1ee02f204e556c26703cebaf9c4019e0 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 28 Aug 2015 13:30:32 +0200 Subject: vga_switcheroo: Use VGA_SWITCHEROO_UNKNOWN_ID instead of -1 Signed-off-by: Lukas Wunner Reviewed-by: Alex Deucher Signed-off-by: Daniel Vetter --- include/linux/vga_switcheroo.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index e63661757505..88909a865b72 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -59,6 +59,9 @@ enum vga_switcheroo_state { /** * enum vga_switcheroo_client_id - client identifier + * @VGA_SWITCHEROO_UNKNOWN_ID: initial identifier assigned to vga clients. + * Determining the id requires the handler, so GPUs are given their + * true id in a delayed fashion in vga_switcheroo_enable() * @VGA_SWITCHEROO_IGD: integrated graphics device * @VGA_SWITCHEROO_DIS: discrete graphics device * @VGA_SWITCHEROO_MAX_CLIENTS: currently no more than two GPUs are supported @@ -66,6 +69,7 @@ enum vga_switcheroo_state { * Client identifier. Audio clients use the same identifier & 0x100. */ enum vga_switcheroo_client_id { + VGA_SWITCHEROO_UNKNOWN_ID = -1, VGA_SWITCHEROO_IGD, VGA_SWITCHEROO_DIS, VGA_SWITCHEROO_MAX_CLIENTS, -- cgit v1.2.3 From fa3e967fffaf267ccab7959429722da34e45ad77 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 28 Aug 2015 12:54:07 +0200 Subject: vga_switcheroo: Use enum vga_switcheroo_client_id instead of int Signed-off-by: Lukas Wunner Reviewed-by: Alex Deucher Signed-off-by: Daniel Vetter --- include/linux/vga_switcheroo.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index 88909a865b72..c55751155631 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -100,7 +100,7 @@ struct vga_switcheroo_handler { int (*switchto)(enum vga_switcheroo_client_id id); int (*power_state)(enum vga_switcheroo_client_id id, enum vga_switcheroo_state state); - int (*get_client_id)(struct pci_dev *pdev); + enum vga_switcheroo_client_id (*get_client_id)(struct pci_dev *pdev); }; /** @@ -132,7 +132,7 @@ int vga_switcheroo_register_client(struct pci_dev *dev, bool driver_power_control); int vga_switcheroo_register_audio_client(struct pci_dev *pdev, const struct vga_switcheroo_client_ops *ops, - int id); + enum vga_switcheroo_client_id id); void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info); @@ -158,7 +158,7 @@ static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_i static inline int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler) { return 0; } static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, const struct vga_switcheroo_client_ops *ops, - int id) { return 0; } + enum vga_switcheroo_client_id id) { return 0; } static inline void vga_switcheroo_unregister_handler(void) {} static inline int vga_switcheroo_process_delayed_switch(void) { return 0; } static inline enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; } -- cgit v1.2.3 From b299167652fe58f1ebadb3e3ac84a5a0b74e534e Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 7 Oct 2015 02:45:11 +0300 Subject: i2c: ocores: support big-endian register layout This allows using OpenCores I2C controller attached to its host in native-endian mode with bi-endian CPUs. Example of such system is Xtensa XTFPGA platform. Acked-by: Peter Korsgaard Signed-off-by: Max Filippov Signed-off-by: Wolfram Sang --- include/linux/i2c-ocores.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c-ocores.h b/include/linux/i2c-ocores.h index 1c06b5c7c308..01edd96fe1f7 100644 --- a/include/linux/i2c-ocores.h +++ b/include/linux/i2c-ocores.h @@ -15,6 +15,7 @@ struct ocores_i2c_platform_data { u32 reg_shift; /* register offset shift value */ u32 reg_io_width; /* register io read/write width */ u32 clock_khz; /* input clock in kHz */ + bool big_endian; /* registers are big endian */ u8 num_devices; /* number of devices in the devices list */ struct i2c_board_info const *devices; /* devices connected to the bus */ }; -- cgit v1.2.3 From c6f1891323e6a259c0b0f516a3a3e0f6b0ee2c5f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 7 Oct 2015 10:16:31 +0200 Subject: i2c: rcar: Remove obsolete platform data support Since commit 4baadb9e05c68962 ("ARM: shmobile: r8a7778: remove obsolete setup code"), Renesas R-Car SoCs are only supported in generic DT-only ARM multi-platform builds. The driver doesn't need to use platform data anymore, hence remove platform data configuration. Signed-off-by: Geert Uytterhoeven [wsa: removed now unused ret value and cast to proper enum type] Signed-off-by: Wolfram Sang --- include/linux/i2c/i2c-rcar.h | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 include/linux/i2c/i2c-rcar.h (limited to 'include/linux') diff --git a/include/linux/i2c/i2c-rcar.h b/include/linux/i2c/i2c-rcar.h deleted file mode 100644 index 496f5c2b23c9..000000000000 --- a/include/linux/i2c/i2c-rcar.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __I2C_R_CAR_H__ -#define __I2C_R_CAR_H__ - -#include - -struct i2c_rcar_platform_data { - u32 bus_speed; -}; - -#endif /* __I2C_R_CAR_H__ */ -- cgit v1.2.3 From 5d170139eb10ae12e1bd076245c42b35453d8324 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 18 Oct 2015 13:05:40 +0200 Subject: vga_switcheroo: Constify vga_switcheroo_handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vga_switcheroo_client_ops has always been declared const since its introduction with 26ec685ff9d9 ("vga_switcheroo: Introduce struct vga_switcheroo_client_ops"). Do so for vga_switcheroo_handler as well. drivers/gpu/drm/amd/amdgpu/amdgpu.ko: 6 .rodata 00009888 - 19 .data 00001f00 + 19 .data 00001ee0 drivers/gpu/drm/nouveau/nouveau.ko: 6 .rodata 000460b8 17 .data 00018fe0 drivers/gpu/drm/radeon/radeon.ko: - 7 .rodata 00030944 + 7 .rodata 00030964 - 21 .data 0000d6a0 + 21 .data 0000d678 drivers/platform/x86/apple-gmux.ko: - 7 .rodata 00000140 + 7 .rodata 00000160 - 11 .data 000000e0 + 11 .data 000000b8 Cc: Ben Skeggs Cc: Darren Hart Cc: Alex Deucher Signed-off-by: Lukas Wunner Reviewed-by: Christian König . Signed-off-by: Daniel Vetter --- include/linux/vga_switcheroo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index c55751155631..786bc931dbd1 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -137,7 +137,7 @@ int vga_switcheroo_register_audio_client(struct pci_dev *pdev, void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info); -int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler); +int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler); void vga_switcheroo_unregister_handler(void); int vga_switcheroo_process_delayed_switch(void); @@ -155,7 +155,7 @@ static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {} static inline int vga_switcheroo_register_client(struct pci_dev *dev, const struct vga_switcheroo_client_ops *ops, bool driver_power_control) { return 0; } static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info) {} -static inline int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler) { return 0; } +static inline int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler) { return 0; } static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, const struct vga_switcheroo_client_ops *ops, enum vga_switcheroo_client_id id) { return 0; } -- cgit v1.2.3 From 9a8928359736ab170303ee8a2cc15db54e3a4a8f Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 15 Oct 2015 14:44:38 +0300 Subject: net/mlx4_core: Add support for filtering multicast loopback Update device capabilities regarding HW filtering multicast loopback support. Add MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB attribute to mlx4_update_qp to enable changing QP context to support filtering incoming multicast loopback traffic according the sender's counter index. Set the corresponding bits in QP context to force the loopback source checks if attribute is given and HW supports it. Signed-off-by: Maor Gottlieb Signed-off-by: Eran Ben Elisha Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 2 ++ include/linux/mlx4/qp.h | 24 +++++++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index baad4cb8e9b0..dac6872dbaea 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -214,6 +214,8 @@ enum { MLX4_DEV_CAP_FLAG2_IGNORE_FCS = 1LL << 28, MLX4_DEV_CAP_FLAG2_PHV_EN = 1LL << 29, MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30, + MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31, + MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32, }; enum { diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index de45a51b3f04..fe052e234906 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -135,7 +135,10 @@ struct mlx4_rss_context { struct mlx4_qp_path { u8 fl; - u8 vlan_control; + union { + u8 vlan_control; + u8 control; + }; u8 disable_pkey_check; u8 pkey_index; u8 counter_index; @@ -156,9 +159,16 @@ struct mlx4_qp_path { }; enum { /* fl */ - MLX4_FL_CV = 1 << 6, - MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2 + MLX4_FL_CV = 1 << 6, + MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2, + MLX4_FL_ETH_SRC_CHECK_MC_LB = 1 << 1, + MLX4_FL_ETH_SRC_CHECK_UC_LB = 1 << 0, }; + +enum { /* control */ + MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER = 1 << 7, +}; + enum { /* vlan_control */ MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED = 1 << 6, MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED = 1 << 5, /* 802.1p priority tag */ @@ -254,6 +264,8 @@ enum { MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE = 14 + 32, MLX4_UPD_QP_PATH_MASK_IF_COUNTER_INDEX = 15 + 32, MLX4_UPD_QP_PATH_MASK_FVL_RX = 16 + 32, + MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB = 18 + 32, + MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB = 19 + 32, }; enum { /* param3 */ @@ -436,11 +448,13 @@ enum mlx4_update_qp_attr { MLX4_UPDATE_QP_VSD = 1 << 1, MLX4_UPDATE_QP_RATE_LIMIT = 1 << 2, MLX4_UPDATE_QP_QOS_VPORT = 1 << 3, - MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 4) - 1 + MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB = 1 << 4, + MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 5) - 1 }; enum mlx4_update_qp_params_flags { - MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE = 1 << 0, + MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB = 1 << 0, + MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE = 1 << 1, }; struct mlx4_update_qp_params { -- cgit v1.2.3 From 62a615e083604d291af0cb18f9b4549531ea4f94 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 23 Oct 2015 12:16:41 +0300 Subject: mfd: core: redo ACPI matching of the children devices There is at least one board on the market, i.e. Intel Galileo Gen2, that uses _ADR to distinguish the devices under one actual device. Due to this we have to improve the quirk in the MFD core to handle that board. Acked-by: Rafael J. Wysocki Acked-by: Lee Jones Signed-off-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/mfd/core.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index a76bc100bf97..27dac3ff18b9 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -18,6 +18,12 @@ struct irq_domain; +/* Matches ACPI PNP id, either _HID or _CID, or ACPI _ADR */ +struct mfd_cell_acpi_match { + const char *pnpid; + const unsigned long long adr; +}; + /* * This struct describes the MFD part ("cell"). * After registration the copy of this structure will become the platform data @@ -44,8 +50,8 @@ struct mfd_cell { */ const char *of_compatible; - /* Matches ACPI PNP id, either _HID or _CID */ - const char *acpi_pnpid; + /* Matches ACPI */ + const struct mfd_cell_acpi_match *acpi_match; /* * These resources can be specified relative to the parent device. -- cgit v1.2.3 From 0d0f4aab4e4d290138a4ae7f2ef8469e48c9a669 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 7 Oct 2015 14:39:55 +0300 Subject: lockd: get rid of reference-counted NSM RPC clients Currently we have reference-counted per-net NSM RPC client which created on the first monitor request and destroyed after the last unmonitor request. It's needed because RPC client need to know 'utsname()->nodename', but utsname() might be NULL when nsm_unmonitor() called. So instead of holding the rpc client we could just save nodename in struct nlm_host and pass it to the rpc_create(). Thus ther is no need in keeping rpc client until last unmonitor request. We could create separate RPC clients for each monitor/unmonitor requests. Signed-off-by: Andrey Ryabinin Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index fd3b65bf51b5..c15373894a42 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -68,6 +68,7 @@ struct nlm_host { struct nsm_handle *h_nsmhandle; /* NSM status handle */ char *h_addrbuf; /* address eyecatcher */ struct net *net; /* host net */ + char nodename[UNX_MAXNODENAME + 1]; }; /* -- cgit v1.2.3 From 778620364ef525e83597a6edee4d0a69db67fd3d Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Fri, 16 Oct 2015 08:59:08 +1100 Subject: sunrpc/cache: make cache flushing more reliable. The caches used to store sunrpc authentication information can be flushed by writing a timestamp to a file in /proc. This timestamp has a one-second resolution and any entry in cache that was last_refreshed *before* that time is treated as expired. This is problematic as it is not possible to reliably flush the cache without interrupting NFS service. If the current time is written to the "flush" file, any entry that was added since the current second started will still be treated as valid. If one second beyond than the current time is written to the file then no entries can be valid until the second ticks over. This will mean that no NFS request will be handled for up to 1 second. To resolve this issue we make two changes: 1/ treat an entry as expired if the timestamp when it was last_refreshed is before *or the same as* the expiry time. This means that current code which writes out the current time will now flush the cache reliably. 2/ when a new entry in added to the cache - set the last_refresh timestamp to 1 second *beyond* the current flush time, when that not in the past. This ensures that newly added entries will always be valid. Now that we have a very reliable way to flush the cache, and also since we are using "since-boot" timestamps which are monotonic, change cache_purge() to set the smallest future flush_time which will work, and leave it there: don't revert to '1'. Also disable the setting of the 'flush_time' far into the future. That has never been useful and is now awkward as it would cause last_refresh times to be strange. Finally: if a request is made to set the 'flush_time' to the current second, assume the intent is to flush the cache and advance it, if necessary, to 1 second beyond the current 'flush_time' so that all active entries will be deemed to be expired. As part of this we need to add a 'cache_detail' arg to cache_init() and cache_fresh_locked() so they can find the current ->flush_time. Signed-off-by: NeilBrown Reported-by: Olaf Kirch Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 03d3b4c92d9f..ed03c9f7f908 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -48,8 +48,10 @@ struct cache_head { struct hlist_node cache_list; time_t expiry_time; /* After time time, don't use the data */ - time_t last_refresh; /* If CACHE_PENDING, this is when upcall - * was sent, else this is when update was received + time_t last_refresh; /* If CACHE_PENDING, this is when upcall was + * sent, else this is when update was + * received, though it is alway set to + * be *after* ->flush_time. */ struct kref ref; unsigned long flags; @@ -105,8 +107,12 @@ struct cache_detail { /* fields below this comment are for internal use * and should not be touched by cache owners */ - time_t flush_time; /* flush all cache items with last_refresh - * earlier than this */ + time_t flush_time; /* flush all cache items with + * last_refresh at or earlier + * than this. last_refresh + * is never set at or earlier + * than this. + */ struct list_head others; time_t nextcheck; int entries; @@ -203,7 +209,7 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) { return (h->expiry_time < seconds_since_boot()) || - (detail->flush_time > h->last_refresh); + (detail->flush_time >= h->last_refresh); } extern int cache_check(struct cache_detail *detail, -- cgit v1.2.3 From c0e5c4450494d74c8deb4f47ddcbb74c94937e20 Mon Sep 17 00:00:00 2001 From: Dustin Byford Date: Fri, 23 Oct 2015 12:27:06 -0700 Subject: acpi: add acpi_preset_companion() stub Add a stub for acpi_preset_companion(). Fixes build failures when acpi_preset_companion() is used and CONFIG_ACPI is not set. Acked-by: Mika Westerberg Signed-off-by: Dustin Byford Acked-by: Rafael J. Wysocki Signed-off-by: Wolfram Sang --- include/linux/acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 43856d19cf4d..43b55e751dea 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -477,6 +477,11 @@ static inline bool has_acpi_companion(struct device *dev) return false; } +static inline void acpi_preset_companion(struct device *dev, + struct acpi_device *parent, u64 addr) +{ +} + static inline const char *acpi_dev_name(struct acpi_device *adev) { return NULL; -- cgit v1.2.3 From d787dcdb9c8f412b1dd0727f90d3f793a61a2551 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 23 Oct 2015 20:41:31 +0200 Subject: bus: sunxi-rsb: Add driver for Allwinner Reduced Serial Bus Reduced Serial Bus (RSB) is an Allwinner proprietery interface used to communicate with PMICs and other peripheral ICs. RSB is a two-wire push-pull serial bus that supports 1 master device and up to 15 active slave devices. Signed-off-by: Chen-Yu Tsai Reviewed-by: Mark Brown Acked-by: Arnd Bergmann Signed-off-by: Maxime Ripard Signed-off-by: Olof Johansson --- include/linux/sunxi-rsb.h | 105 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 include/linux/sunxi-rsb.h (limited to 'include/linux') diff --git a/include/linux/sunxi-rsb.h b/include/linux/sunxi-rsb.h new file mode 100644 index 000000000000..7e75bb0346d0 --- /dev/null +++ b/include/linux/sunxi-rsb.h @@ -0,0 +1,105 @@ +/* + * Allwinner Reduced Serial Bus Driver + * + * Copyright (c) 2015 Chen-Yu Tsai + * + * Author: Chen-Yu Tsai + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ +#ifndef _SUNXI_RSB_H +#define _SUNXI_RSB_H + +#include +#include +#include + +struct sunxi_rsb; + +/** + * struct sunxi_rsb_device - Basic representation of an RSB device + * @dev: Driver model representation of the device. + * @ctrl: RSB controller managing the bus hosting this device. + * @rtaddr: This device's runtime address + * @hwaddr: This device's hardware address + */ +struct sunxi_rsb_device { + struct device dev; + struct sunxi_rsb *rsb; + int irq; + u8 rtaddr; + u16 hwaddr; +}; + +static inline struct sunxi_rsb_device *to_sunxi_rsb_device(struct device *d) +{ + return container_of(d, struct sunxi_rsb_device, dev); +} + +static inline void *sunxi_rsb_device_get_drvdata(const struct sunxi_rsb_device *rdev) +{ + return dev_get_drvdata(&rdev->dev); +} + +static inline void sunxi_rsb_device_set_drvdata(struct sunxi_rsb_device *rdev, + void *data) +{ + dev_set_drvdata(&rdev->dev, data); +} + +/** + * struct sunxi_rsb_driver - RSB slave device driver + * @driver: RSB device drivers should initialize name and owner field of + * this structure. + * @probe: binds this driver to a RSB device. + * @remove: unbinds this driver from the RSB device. + */ +struct sunxi_rsb_driver { + struct device_driver driver; + int (*probe)(struct sunxi_rsb_device *rdev); + int (*remove)(struct sunxi_rsb_device *rdev); +}; + +static inline struct sunxi_rsb_driver *to_sunxi_rsb_driver(struct device_driver *d) +{ + return container_of(d, struct sunxi_rsb_driver, driver); +} + +int sunxi_rsb_driver_register(struct sunxi_rsb_driver *rdrv); + +/** + * sunxi_rsb_driver_unregister() - unregister an RSB client driver + * @rdrv: the driver to unregister + */ +static inline void sunxi_rsb_driver_unregister(struct sunxi_rsb_driver *rdrv) +{ + if (rdrv) + driver_unregister(&rdrv->driver); +} + +#define module_sunxi_rsb_driver(__sunxi_rsb_driver) \ + module_driver(__sunxi_rsb_driver, sunxi_rsb_driver_register, \ + sunxi_rsb_driver_unregister) + +struct regmap *__devm_regmap_init_sunxi_rsb(struct sunxi_rsb_device *rdev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); + +/** + * devm_regmap_init_sunxi_rsb(): Initialise managed register map + * + * @rdev: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_sunxi_rsb(rdev, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_sunxi_rsb, #config, \ + rdev, config) + +#endif /* _SUNXI_RSB_H */ -- cgit v1.2.3 From 1be5336bc7ba050ee07d352643bf4c01c513553c Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 16 Oct 2015 10:18:10 +0300 Subject: dmaengine: edma: New device tree binding With the old binding and driver architecture we had many issues: No way to assign eDMA channels to event queues, thus not able to tune the system by moving specific DMA channels to low/high priority servicing. We moved the cyclic channels to high priority within the code, but that was just a workaround to this issue. Memcopy was fundamentally broken: even if the driver scanned the DT/devices in the booted system for direct DMA users (which is not effective when the events are going through a crossbar) and created a map of 'used' channels, this information was not really usable. Since via dmaengien API the eDMA driver will be called with _some_ channel number, we would try to request this channel when any channel is requested for memcpy. By luck we got channel which is not used by any device most of the time so things worked, but if a device would have been using the given channel, but not requested it, the memcpy channel would have been waiting for HW event. The old code had the am33xx/am43xx DMA event router handling embedded. This should have been done in a separate driver since it is not part of the actual eDMA IP. There were no way to 'lock' PaRAM slots to be used by the DSP for example when booting with DT. In DT boot the edma node used more than one hwmod which is not a good practice and the kernel prints warning because of this. With the new bindings and the changes in the driver we can: - No regression with Legacy binding and non DT boot - DMA channels can be assigned to any TC (to set priority) - PaRAM slots can be reserved for other cores to use - Dynamic power management for CC and TCs, if only TC0 is used all other TC can be powered down for example Signed-off-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- include/linux/platform_data/edma.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h index 6b9d500956e4..e2878baeb90e 100644 --- a/include/linux/platform_data/edma.h +++ b/include/linux/platform_data/edma.h @@ -71,6 +71,9 @@ struct edma_soc_info { /* Resource reservation for other cores */ struct edma_rsv_info *rsv; + /* List of channels allocated for memcpy, terminated with -1 */ + s16 *memcpy_channels; + s8 (*queue_priority_mapping)[2]; const s16 (*xbar_chans)[2]; }; -- cgit v1.2.3 From 412a15c0fe537c59c794d4e8134580b9cb984a0c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:36 +0300 Subject: svcrdma: Port to new memory registration API Instead of maintaining a fastreg page list, keep an sg table and convert an array of pages to a sg list. Then call ib_map_mr_sg and construct ib_reg_wr. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Tested-by: Steve Wise Tested-by: Selvin Xavier Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 7ccc961f33e9..1e4438ea2380 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -105,11 +105,9 @@ struct svc_rdma_chunk_sge { }; struct svc_rdma_fastreg_mr { struct ib_mr *mr; - void *kva; - struct ib_fast_reg_page_list *page_list; - int page_list_len; + struct scatterlist *sg; + int sg_nents; unsigned long access_flags; - unsigned long map_len; enum dma_data_direction direction; struct list_head frmr_list; }; -- cgit v1.2.3 From a519435a96597d8cd96123246fea4ae5a6c90b02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 20 Oct 2015 16:34:16 +0200 Subject: dma-buf/fence: add fence_wait_any_timeout function v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Waiting for the first fence in an array of fences to signal. This is useful for device driver specific resource managers and also Vulkan needs something similar. v2: more parameter checks, handling for timeout==0, remove NULL entry support, better callback removal. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Maarten Lankhorst --- include/linux/fence.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fence.h b/include/linux/fence.h index 39efee130d2b..a4084d6bb851 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -305,7 +305,8 @@ static inline struct fence *fence_later(struct fence *f1, struct fence *f2) } signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout); - +signed long fence_wait_any_timeout(struct fence **fences, uint32_t count, + bool intr, signed long timeout); /** * fence_wait - sleep until the fence gets signaled -- cgit v1.2.3 From 6c455ac17bcf4beae6c094a1007b976b60b4bb57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 21 Oct 2015 12:58:17 +0200 Subject: dma-buf/fence: add fence_is_later() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return true when fence 1 is later than fence 2 without checking if any of them are signaled. Useful for driver specific resource handling based on fences. Signed-off-by: Christian König Reviewed-by: Alex Deucher --- include/linux/fence.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fence.h b/include/linux/fence.h index a4084d6bb851..bb522011383b 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -279,6 +279,22 @@ fence_is_signaled(struct fence *fence) return false; } +/** + * fence_is_later - return if f1 is chronologically later than f2 + * @f1: [in] the first fence from the same context + * @f2: [in] the second fence from the same context + * + * Returns true if f1 is chronologically later than f2. Both fences must be + * from the same context, since a seqno is not re-used across contexts. + */ +static inline bool fence_is_later(struct fence *f1, struct fence *f2) +{ + if (WARN_ON(f1->context != f2->context)) + return false; + + return f1->seqno - f2->seqno < INT_MAX; +} + /** * fence_later - return the chronologically later fence * @f1: [in] the first fence from the same context @@ -298,10 +314,10 @@ static inline struct fence *fence_later(struct fence *f1, struct fence *f2) * set if enable_signaling wasn't called, and enabling that here is * overkill. */ - if (f2->seqno - f1->seqno <= INT_MAX) - return fence_is_signaled(f2) ? NULL : f2; - else + if (fence_is_later(f1, f2)) return fence_is_signaled(f1) ? NULL : f1; + else + return fence_is_signaled(f2) ? NULL : f2; } signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout); -- cgit v1.2.3 From a76caf55e5b356ba20a5a43ac4d9f7a04b20941d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98rjan=20Eide?= Date: Thu, 10 Sep 2015 18:09:30 +0100 Subject: thermal: Add devfreq cooling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a generic thermal cooling device for devfreq, that is similar to cpu_cooling. The device must use devfreq. In order to use the power extension of the cooling device, it must have registered its OPPs using the OPP library. Cc: Zhang Rui Cc: Eduardo Valentin Signed-off-by: Javi Merino Signed-off-by: Ørjan Eide Signed-off-by: Eduardo Valentin --- include/linux/devfreq_cooling.h | 81 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 include/linux/devfreq_cooling.h (limited to 'include/linux') diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h new file mode 100644 index 000000000000..ee5f0ec9290b --- /dev/null +++ b/include/linux/devfreq_cooling.h @@ -0,0 +1,81 @@ +/* + * devfreq_cooling: Thermal cooling device implementation for devices using + * devfreq + * + * Copyright (C) 2014-2015 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __DEVFREQ_COOLING_H__ +#define __DEVFREQ_COOLING_H__ + +#include +#include + +#ifdef CONFIG_DEVFREQ_THERMAL + +/** + * struct devfreq_cooling_power - Devfreq cooling power ops + * @get_static_power: Take voltage, in mV, and return the static power + * in mW. If NULL, the static power is assumed + * to be 0. + * @get_dynamic_power: Take voltage, in mV, and frequency, in HZ, and + * return the dynamic power draw in mW. If NULL, + * a simple power model is used. + * @dyn_power_coeff: Coefficient for the simple dynamic power model in + * mW/(MHz mV mV). + * If get_dynamic_power() is NULL, then the + * dynamic power is calculated as + * @dyn_power_coeff * frequency * voltage^2 + */ +struct devfreq_cooling_power { + unsigned long (*get_static_power)(unsigned long voltage); + unsigned long (*get_dynamic_power)(unsigned long freq, + unsigned long voltage); + unsigned long dyn_power_coeff; +}; + +struct devfreq_cooling_device * +of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, + struct devfreq_cooling_power *dfc_power); +struct devfreq_cooling_device * +of_devfreq_cooling_register(struct device_node *np, struct devfreq *df); +struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df); +void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc); + +#else /* !CONFIG_DEVFREQ_THERMAL */ + +struct devfreq_cooling_device * +of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, + struct devfreq_cooling_power *dfc_power) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct devfreq_cooling_device * +of_devfreq_cooling_register(struct device_node *np, struct devfreq *df) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct devfreq_cooling_device * +devfreq_cooling_register(struct devfreq *df) +{ + return ERR_PTR(-EINVAL); +} + +static inline void +devfreq_cooling_unregister(struct devfreq_cooling_device *dfc) +{ +} + +#endif /* CONFIG_DEVFREQ_THERMAL */ +#endif /* __DEVFREQ_COOLING_H__ */ -- cgit v1.2.3 From df5c7386f62d2db95ca48005087195e9a15e2b1f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 13 Oct 2015 20:09:19 +0300 Subject: dmaengine: dw: some Intel devices has no memcpy support Provide a flag to choose if the device does support memory-to-memory transfers. At least this is not true for iDMA32 controller that might be supported in the future. Besides that Intel BayTrail and Braswell users should not try this feature due to HW specific behaviour. Signed-off-by: Andy Shevchenko Acked-by: Viresh Kumar Signed-off-by: Vinod Koul --- include/linux/platform_data/dma-dw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 87ac14c584f2..03b6095d3b18 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -37,6 +37,7 @@ struct dw_dma_slave { * @nr_channels: Number of channels supported by hardware (max 8) * @is_private: The device channels should be marked as private and not for * by the general purpose DMA channel allocator. + * @is_memcpy: The device channels do support memory-to-memory transfers. * @chan_allocation_order: Allocate channels starting from 0 or 7 * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0. * @block_size: Maximum block size supported by the controller @@ -47,6 +48,7 @@ struct dw_dma_slave { struct dw_dma_platform_data { unsigned int nr_channels; bool is_private; + bool is_memcpy; #define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ #define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */ unsigned char chan_allocation_order; -- cgit v1.2.3 From 42e5c3e2725ba0c0affc1fc8a6aa1d5cf31ecb75 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 24 Oct 2015 17:27:35 -0400 Subject: SUNRPC: Abstract backchannel operations xprt_{setup,destroy}_backchannel() won't be adequate for RPC/RMDA bi-direction. In particular, receive buffers have to be pre- registered and posted in order to receive incoming backchannel requests. Add a virtual function call to allow the insertion of appropriate backchannel setup and destruction methods for each transport. In addition, freeing a backchannel request is a little different for RPC/RDMA. Introduce an rpc_xprt_op to handle the difference. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Tested-By: Devesh Sharma Signed-off-by: Anna Schumaker --- include/linux/sunrpc/bc_xprt.h | 5 +++++ include/linux/sunrpc/xprt.h | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 8df43c9f11dc..4397a4824c81 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -38,6 +38,11 @@ void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); +/* Socket backchannel transport methods */ +int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs); +void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs); +void xprt_free_bc_rqst(struct rpc_rqst *req); + /* * Determine if a shared backchannel is in use */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 0fb9acbb4780..3f79c4a4ce74 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -136,6 +136,11 @@ struct rpc_xprt_ops { int (*enable_swap)(struct rpc_xprt *xprt); void (*disable_swap)(struct rpc_xprt *xprt); void (*inject_disconnect)(struct rpc_xprt *xprt); + int (*bc_setup)(struct rpc_xprt *xprt, + unsigned int min_reqs); + void (*bc_free_rqst)(struct rpc_rqst *rqst); + void (*bc_destroy)(struct rpc_xprt *xprt, + unsigned int max_reqs); }; /* -- cgit v1.2.3 From 9468431962616c2449d47c482208a5967e011bf9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 24 Oct 2015 17:28:16 -0400 Subject: svcrdma: Add backward direction service for RPC/RDMA transport On NFSv4.1 mount points, the Linux NFS client uses this transport endpoint to receive backward direction calls and route replies back to the NFSv4.1 server. Signed-off-by: Chuck Lever Acked-by: "J. Bruce Fields" Reviewed-by: Sagi Grimberg Tested-By: Devesh Sharma Signed-off-by: Anna Schumaker --- include/linux/sunrpc/svc_rdma.h | 6 +++++- include/linux/sunrpc/xprt.h | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 7ccc961f33e9..fb4013edcf57 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -228,9 +228,13 @@ extern void svc_rdma_put_frmr(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); extern void svc_sq_reap(struct svcxprt_rdma *); extern void svc_rq_reap(struct svcxprt_rdma *); -extern struct svc_xprt_class svc_rdma_class; extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); +extern struct svc_xprt_class svc_rdma_class; +#ifdef CONFIG_SUNRPC_BACKCHANNEL +extern struct svc_xprt_class svc_rdma_bc_class; +#endif + /* svc_rdma.c */ extern int svc_rdma_init(void); extern void svc_rdma_cleanup(void); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3f79c4a4ce74..82c083946ef0 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -158,6 +158,7 @@ enum xprt_transports { XPRT_TRANSPORT_TCP = IPPROTO_TCP, XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC, XPRT_TRANSPORT_RDMA = 256, + XPRT_TRANSPORT_BC_RDMA = XPRT_TRANSPORT_RDMA | XPRT_TRANSPORT_BC, XPRT_TRANSPORT_LOCAL = 257, }; -- cgit v1.2.3 From 3c99c2cef75eb5bfc05c5728e4560f3ee656d47e Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Mon, 2 Nov 2015 19:03:03 +0000 Subject: thermal: devfreq_cooling: use a thermal_cooling_device for register and unregister Be consistent with what other cooling devices do and return a struct thermal_cooling_device * on register. Also, for the unregister, accept a struct thermal_cooling_device * as parameter. Cc: Zhang Rui Cc: Eduardo Valentin Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin --- include/linux/devfreq_cooling.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h index ee5f0ec9290b..7adf6cc4b305 100644 --- a/include/linux/devfreq_cooling.h +++ b/include/linux/devfreq_cooling.h @@ -43,37 +43,37 @@ struct devfreq_cooling_power { unsigned long dyn_power_coeff; }; -struct devfreq_cooling_device * +struct thermal_cooling_device * of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, struct devfreq_cooling_power *dfc_power); -struct devfreq_cooling_device * +struct thermal_cooling_device * of_devfreq_cooling_register(struct device_node *np, struct devfreq *df); -struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df); -void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc); +struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df); +void devfreq_cooling_unregister(struct thermal_cooling_device *dfc); #else /* !CONFIG_DEVFREQ_THERMAL */ -struct devfreq_cooling_device * +struct thermal_cooling_device * of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, struct devfreq_cooling_power *dfc_power) { return ERR_PTR(-EINVAL); } -static inline struct devfreq_cooling_device * +static inline struct thermal_cooling_device * of_devfreq_cooling_register(struct device_node *np, struct devfreq *df) { return ERR_PTR(-EINVAL); } -static inline struct devfreq_cooling_device * +static inline struct thermal_cooling_device * devfreq_cooling_register(struct devfreq *df) { return ERR_PTR(-EINVAL); } static inline void -devfreq_cooling_unregister(struct devfreq_cooling_device *dfc) +devfreq_cooling_unregister(struct thermal_cooling_device *dfc) { } -- cgit v1.2.3 From 76566773a1f1c2295ed901b6f1241cfe10d99029 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 24 Oct 2015 17:28:32 -0400 Subject: NFS: Enable client side NFSv4.1 backchannel to use other transports Forechannel transports get their own "bc_up" method to create an endpoint for the backchannel service. Signed-off-by: Chuck Lever [Anna Schumaker: Add forward declaration of struct net to xprt.h] Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 82c083946ef0..69ef5b3ab038 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -54,6 +54,8 @@ enum rpc_display_format_t { struct rpc_task; struct rpc_xprt; struct seq_file; +struct svc_serv; +struct net; /* * This describes a complete RPC request @@ -138,6 +140,7 @@ struct rpc_xprt_ops { void (*inject_disconnect)(struct rpc_xprt *xprt); int (*bc_setup)(struct rpc_xprt *xprt, unsigned int min_reqs); + int (*bc_up)(struct svc_serv *serv, struct net *net); void (*bc_free_rqst)(struct rpc_rqst *rqst); void (*bc_destroy)(struct rpc_xprt *xprt, unsigned int max_reqs); -- cgit v1.2.3 From 79dbd1baa651cece408e68a1b445f3628c4b5bdc Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 26 Oct 2015 22:23:56 +0100 Subject: libceph: msg signing callouts don't need con argument We can use msg->con instead - at the point we sign an outgoing message or check the signature on the incoming one, msg->con is always set. We wouldn't know how to sign a message without an associated session (i.e. msg->con == NULL) and being able to sign a message using an explicitly provided authorizer is of no use. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index b2371d9b51fa..3687ff0f0133 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -43,10 +43,9 @@ struct ceph_connection_operations { struct ceph_msg * (*alloc_msg) (struct ceph_connection *con, struct ceph_msg_header *hdr, int *skip); - int (*sign_message) (struct ceph_connection *con, struct ceph_msg *msg); - int (*check_message_signature) (struct ceph_connection *con, - struct ceph_msg *msg); + int (*sign_message) (struct ceph_msg *msg); + int (*check_message_signature) (struct ceph_msg *msg); }; /* use format string %s%d */ -- cgit v1.2.3 From 859bff51dc5e92ddfb5eb6f17b8040d9311095bb Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 28 Oct 2015 23:50:58 +0100 Subject: libceph: stop duplicating client fields in messenger supported_features and required_features serve no purpose at all, while nocrc and tcp_nodelay belong to ceph_options::flags. Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 1 + include/linux/ceph/messenger.h | 11 +---------- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 397c5cd09794..a7caafe03d3c 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -137,6 +137,7 @@ struct ceph_client { #endif }; +#define from_msgr(ms) container_of(ms, struct ceph_client, msgr) /* diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 3687ff0f0133..71b1d6cdcb5d 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -57,8 +57,6 @@ struct ceph_messenger { atomic_t stopping; possible_net_t net; - bool nocrc; - bool tcp_nodelay; /* * the global_seq counts connections i (attempt to) initiate @@ -66,9 +64,6 @@ struct ceph_messenger { */ u32 global_seq; spinlock_t global_seq_lock; - - u64 supported_features; - u64 required_features; }; enum ceph_msg_data_type { @@ -267,11 +262,7 @@ extern void ceph_msgr_exit(void); extern void ceph_msgr_flush(void); extern void ceph_messenger_init(struct ceph_messenger *msgr, - struct ceph_entity_addr *myaddr, - u64 supported_features, - u64 required_features, - bool nocrc, - bool tcp_nodelay); + struct ceph_entity_addr *myaddr); extern void ceph_messenger_fini(struct ceph_messenger *msgr); extern void ceph_con_init(struct ceph_connection *con, void *private, -- cgit v1.2.3 From a51983e4dd2d4d63912aab939f657c4cd476e21a Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 28 Oct 2015 23:52:06 +0100 Subject: libceph: add nocephx_sign_messages option Support for message signing was merged into 3.19, along with nocephx_require_signatures option. But, all that option does is allow the kernel client to talk to clusters that don't support MSG_AUTH feature bit. That's pretty useless, given that it's been supported since bobtail. Meanwhile, if one disables message signing on the server side with "cephx sign messages = false", it becomes impossible to use the kernel client since it expects messages to be signed if MSG_AUTH was negotiated. Add nocephx_sign_messages option to support this use case. Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index a7caafe03d3c..3e3799cdc6e6 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -29,8 +29,9 @@ #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ #define CEPH_OPT_MYIP (1<<2) /* specified my ip */ #define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */ -#define CEPH_OPT_NOMSGAUTH (1<<4) /* not require cephx message signature */ +#define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */ #define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */ +#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs */ #define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY) -- cgit v1.2.3 From 1e935949111e77b2b1b6fa550e88ff0573c2f4c7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 29 Sep 2015 01:27:24 -0700 Subject: watchdog: Always evaluate new timeout against min_timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Up to now, a new timeout value is only evaluated against min_timeout if max_timeout is provided. This does not really make sense; a driver can have a minimum timeout even if it does not have a maximum timeout. Ensure that it is not smaller than min_timeout, even if max_timeout is not set. Signed-off-by: Guenter Roeck Acked-by: Uwe Kleine-König Signed-off-by: Wim Van Sebroeck --- include/linux/watchdog.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index d74a0e907b9e..e90e3ea5ebeb 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -119,8 +119,15 @@ static inline void watchdog_set_nowayout(struct watchdog_device *wdd, bool noway /* Use the following function to check if a timeout value is invalid */ static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigned int t) { - return ((wdd->max_timeout != 0) && - (t < wdd->min_timeout || t > wdd->max_timeout)); + /* + * The timeout is invalid if + * - the requested value is smaller than the configured minimum timeout, + * or + * - a maximum timeout is configured, and the requested value is larger + * than the maximum timeout. + */ + return t < wdd->min_timeout || + (wdd->max_timeout && t > wdd->max_timeout); } /* Use the following functions to manipulate watchdog driver specific data */ -- cgit v1.2.3 From 8fbcf237439f841e7e9c4675790e08ea1c295bd3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 3 Nov 2015 18:25:34 +0100 Subject: nfs: Remove unused xdr page offsets in getacl/setacl arguments The arguments passed around for getacl and setacl xdr encoding, struct nfs_setaclargs and struct nfs_getaclargs, both contain an array of pages, an offset into the first page, and the length of the page data. The offset is unused as it is always zero; remove it. Signed-off-by: Andreas Gruenbacher Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4728e7e5fc49..570d630f98ae 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -705,7 +705,6 @@ struct nfs_setaclargs { struct nfs4_sequence_args seq_args; struct nfs_fh * fh; size_t acl_len; - unsigned int acl_pgbase; struct page ** acl_pages; }; @@ -717,7 +716,6 @@ struct nfs_getaclargs { struct nfs4_sequence_args seq_args; struct nfs_fh * fh; size_t acl_len; - unsigned int acl_pgbase; struct page ** acl_pages; }; -- cgit v1.2.3 From 80220fa72b917c64675f3ba4008d2c5a7b50b281 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 3 Nov 2015 09:00:15 +0100 Subject: watchdog: include: fix some typos Signed-off-by: Wolfram Sang Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/linux/watchdog.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index e90e3ea5ebeb..5f18dd9ec224 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -25,7 +25,7 @@ struct watchdog_device; * @ping: The routine that sends a keepalive ping to the watchdog device. * @status: The routine that shows the status of the watchdog device. * @set_timeout:The routine for setting the watchdog devices timeout value. - * @get_timeleft:The routine that get's the time that's left before a reset. + * @get_timeleft:The routine that gets the time left before a reset. * @ref: The ref operation for dyn. allocated watchdog_device structs * @unref: The unref operation for dyn. allocated watchdog_device structs * @ioctl: The routines that handles extra ioctl calls. @@ -33,7 +33,7 @@ struct watchdog_device; * The watchdog_ops structure contains a list of low-level operations * that control a watchdog device. It also contains the module that owns * these operations. The start and stop function are mandatory, all other - * functions are optonal. + * functions are optional. */ struct watchdog_ops { struct module *owner; -- cgit v1.2.3 From 760d280084f8805e5de73e3591912d5db9da9dbe Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 3 Nov 2015 09:00:16 +0100 Subject: watchdog: include: add units for timeout values in kerneldoc Signed-off-by: Wolfram Sang Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/linux/watchdog.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 5f18dd9ec224..027b1f43f12d 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -24,8 +24,8 @@ struct watchdog_device; * @stop: The routine for stopping the watchdog device. * @ping: The routine that sends a keepalive ping to the watchdog device. * @status: The routine that shows the status of the watchdog device. - * @set_timeout:The routine for setting the watchdog devices timeout value. - * @get_timeleft:The routine that gets the time left before a reset. + * @set_timeout:The routine for setting the watchdog devices timeout value (in seconds). + * @get_timeleft:The routine that gets the time left before a reset (in seconds). * @ref: The ref operation for dyn. allocated watchdog_device structs * @unref: The unref operation for dyn. allocated watchdog_device structs * @ioctl: The routines that handles extra ioctl calls. @@ -59,9 +59,9 @@ struct watchdog_ops { * @info: Pointer to a watchdog_info structure. * @ops: Pointer to the list of watchdog operations. * @bootstatus: Status of the watchdog device at boot. - * @timeout: The watchdog devices timeout value. - * @min_timeout:The watchdog devices minimum timeout value. - * @max_timeout:The watchdog devices maximum timeout value. + * @timeout: The watchdog devices timeout value (in seconds). + * @min_timeout:The watchdog devices minimum timeout value (in seconds). + * @max_timeout:The watchdog devices maximum timeout value (in seconds). * @driver-data:Pointer to the drivers private data. * @lock: Lock for watchdog core internal use only. * @status: Field that contains the devices internal status bits. -- cgit v1.2.3 From 033291eccbdb1b70ffc02641edae19ac825dc75d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 15 Oct 2015 15:08:48 -0600 Subject: vfio: Include No-IOMMU mode There is really no way to safely give a user full access to a DMA capable device without an IOMMU to protect the host system. There is also no way to provide DMA translation, for use cases such as device assignment to virtual machines. However, there are still those users that want userspace drivers even under those conditions. The UIO driver exists for this use case, but does not provide the degree of device access and programming that VFIO has. In an effort to avoid code duplication, this introduces a No-IOMMU mode for VFIO. This mode requires building VFIO with CONFIG_VFIO_NOIOMMU and enabling the "enable_unsafe_noiommu_mode" option on the vfio driver. This should make it very clear that this mode is not safe. Additionally, CAP_SYS_RAWIO privileges are necessary to work with groups and containers using this mode. Groups making use of this support are named /dev/vfio/noiommu-$GROUP and can only make use of the special VFIO_NOIOMMU_IOMMU for the container. Use of this mode, specifically binding a device without a native IOMMU group to a VFIO bus driver will taint the kernel and should therefore not be considered supported. This patch includes no-iommu support for the vfio-pci bus driver only. Signed-off-by: Alex Williamson Acked-by: Michael S. Tsirkin --- include/linux/vfio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index ddb440975382..610a86a892b8 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -44,6 +44,9 @@ struct vfio_device_ops { void (*request)(void *device_data, unsigned int count); }; +extern struct iommu_group *vfio_iommu_group_get(struct device *dev); +extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev); + extern int vfio_add_group_dev(struct device *dev, const struct vfio_device_ops *ops, void *device_data); -- cgit v1.2.3 From e02328f47bd75fde9decf9657ec7d769b370f857 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 8 Sep 2015 14:17:47 +0200 Subject: vga_switcheroo: Drop client power state VGA_SWITCHEROO_INIT hda_intel.c:azx_probe() defers initialization of an audio controller on the discrete GPU if the GPU is powered off. The power state of the GPU is determined by calling vga_switcheroo_get_client_state(). vga_switcheroo_get_client_state() returns VGA_SWITCHEROO_INIT if vga_switcheroo is not enabled, i.e. if no second GPU or no handler has registered. This can go wrong in the following scenario: - Driver for the integrated GPU is not loaded. - Driver for the discrete GPU registers with vga_switcheroo, uses driver power control to power down the GPU, handler cuts power to the GPU. - Driver for the audio controller gets loaded after the GPU was powered down, calls vga_switcheroo_get_client_state() which returns VGA_SWITCHEROO_INIT instead of VGA_SWITCHEROO_OFF. - Consequence: azx_probe() tries to initialize the audio controller even though the GPU is powered down. The power state VGA_SWITCHEROO_INIT was introduced by c8e9cf7bb240 ("vga_switcheroo: Add a helper function to get the client state"). It is not apparent what its benefit might be. The idea seems to be to initialize the audio controller even if the power state is VGA_SWITCHEROO_OFF (were vga_switcheroo enabled), but as shown above this can fail. Drop VGA_SWITCHEROO_INIT to solve this. Acked-by: Takashi Iwai Signed-off-by: Lukas Wunner Signed-off-by: Dave Airlie --- include/linux/vga_switcheroo.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index 786bc931dbd1..69e1d4a1f1b3 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -39,10 +39,6 @@ struct pci_dev; * enum vga_switcheroo_state - client power state * @VGA_SWITCHEROO_OFF: off * @VGA_SWITCHEROO_ON: on - * @VGA_SWITCHEROO_INIT: client has registered with vga_switcheroo but - * vga_switcheroo is not enabled, i.e. no second client or no handler - * has registered. Only used in vga_switcheroo_get_client_state() which - * in turn is only called from hda_intel.c * @VGA_SWITCHEROO_NOT_FOUND: client has not registered with vga_switcheroo. * Only used in vga_switcheroo_get_client_state() which in turn is only * called from hda_intel.c @@ -53,7 +49,6 @@ enum vga_switcheroo_state { VGA_SWITCHEROO_OFF, VGA_SWITCHEROO_ON, /* below are referred only from vga_switcheroo_get_client_state() */ - VGA_SWITCHEROO_INIT, VGA_SWITCHEROO_NOT_FOUND, }; -- cgit v1.2.3 From 8f25348b65cd073f77945f559ab1e5de83422cd1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 4 Nov 2015 14:59:06 +0100 Subject: net: add forgotten IFF_L3MDEV_SLAVE define Fixes: fee6d4c77 ("net: Add netif_is_l3_slave") Signed-off-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4ac653b7b8ac..2c00772bd136 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1322,6 +1322,7 @@ enum netdev_priv_flags { #define IFF_L3MDEV_MASTER IFF_L3MDEV_MASTER #define IFF_NO_QUEUE IFF_NO_QUEUE #define IFF_OPENVSWITCH IFF_OPENVSWITCH +#define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE /** * struct net_device - The DEVICE structure. -- cgit v1.2.3 From 805c4bc05705fb2b71ec970960b456eee9900953 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Nov 2015 11:07:13 -0800 Subject: tcp: fix req->saved_syn race For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix ireq->pktopts race"), we need to make sure we do not access req->saved_syn unless we own the request sock. This fixes races for listeners using TCP_SAVE_SYN option. Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet Reported-by: Ying Cai Signed-off-by: David S. Miller --- include/linux/tcp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c906f4534581..b386361ba3e8 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -397,6 +397,13 @@ static inline void fastopen_queue_tune(struct sock *sk, int backlog) queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn); } +static inline void tcp_move_syn(struct tcp_sock *tp, + struct request_sock *req) +{ + tp->saved_syn = req->saved_syn; + req->saved_syn = NULL; +} + static inline void tcp_saved_syn_free(struct tcp_sock *tp) { kfree(tp->saved_syn); -- cgit v1.2.3 From 61b590b9ee4221173ad6990a1150c5c9db73564e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 23 Oct 2015 12:43:18 +0200 Subject: netfilter: ingress: don't use nf_hook_list_active nf_hook_list_active() always returns true once at least one device has NF_INGRESS hook enabled. Thus, don't use this function. Instead, inverse the test and use the static key to elide list_empty test if no NF_INGRESS hooks are active. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ingress.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 187feabe557c..ba7ce8805fe3 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -5,10 +5,13 @@ #include #ifdef CONFIG_NETFILTER_INGRESS -static inline int nf_hook_ingress_active(struct sk_buff *skb) +static inline bool nf_hook_ingress_active(const struct sk_buff *skb) { - return nf_hook_list_active(&skb->dev->nf_hooks_ingress, - NFPROTO_NETDEV, NF_NETDEV_INGRESS); +#ifdef HAVE_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) + return false; +#endif + return !list_empty(&skb->dev->nf_hooks_ingress); } static inline int nf_hook_ingress(struct sk_buff *skb) -- cgit v1.2.3 From b4865988eab598e56e6e628b9b32441acd142b28 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 6 Nov 2015 18:35:57 +0100 Subject: netfilter: ingress: fix wrong input interface on hook The input and output interfaces in nf_hook_state_init() are flipped. This fixes iif matching on nftables. Reported-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ingress.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index ba7ce8805fe3..5fcd375ef175 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -19,8 +19,8 @@ static inline int nf_hook_ingress(struct sk_buff *skb) struct nf_hook_state state; nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress, - NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL, - skb->dev, NULL, dev_net(skb->dev), NULL); + NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, + skb->dev, NULL, NULL, dev_net(skb->dev), NULL); return nf_hook_slow(skb, &state); } -- cgit v1.2.3 From 1b9863c6aa56d92126ec0d5c42eae25df52b7ca1 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:47 -0700 Subject: device property: Introducing enum dev_dma_attr A device could have one of the following DMA attributes: * DMA not supported * DMA non-coherent * DMA coherent So, this patch introduces enum dev_dma_attribute. This will be used by new APIs introduced in later patches. Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 463de52fe891..8eecf200bae5 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -27,6 +27,12 @@ enum dev_prop_type { DEV_PROP_MAX, }; +enum dev_dma_attr { + DEV_DMA_NOT_SUPPORTED, + DEV_DMA_NON_COHERENT, + DEV_DMA_COHERENT, +}; + bool device_property_present(struct device *dev, const char *propname); int device_property_read_u8_array(struct device *dev, const char *propname, u8 *val, size_t nval); -- cgit v1.2.3 From b84f196d963c3159329f72ca1913b08679004a43 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:48 -0700 Subject: ACPI: Adding DMA Attribute APIs for ACPI Device Adding acpi_get_dma_attr() to query DMA attributes of ACPI devices. It returns the enum dev_dma_attr, which communicates DMA information more clearly. This API replaces the acpi_check_dma(), which will be removed in subsequent patch. This patch also provides a convenient function, acpi_dma_supported(), to check DMA support of the specified ACPI device. Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 496265b0f527..292af3b69ede 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -579,6 +579,16 @@ static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent) return false; } +static inline bool acpi_dma_supported(struct acpi_device *adev) +{ + return false; +} + +static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) +{ + return DEV_DMA_NOT_SUPPORTED; +} + #define ACPI_PTR(_ptr) (NULL) #endif /* !CONFIG_ACPI */ -- cgit v1.2.3 From e5e558644bbb23cad03c586703331b8bcd9e0e6c Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:49 -0700 Subject: device property: Adding DMA Attribute APIs for Generic Devices The function device_dma_is_coherent() does not sufficiently communicate device DMA attributes. Instead, this patch introduces device_get_dma_attr(), which returns enum dev_dma_attr. It replaces the acpi_check_dma(), which will be removed in subsequent patch. This also provides a convenient function, device_dma_supported(), to check DMA support of the specified device. Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 8eecf200bae5..7200490b7e6f 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -176,6 +176,10 @@ void device_add_property_set(struct device *dev, struct property_set *pset); bool device_dma_is_coherent(struct device *dev); +bool device_dma_supported(struct device *dev); + +enum dev_dma_attr device_get_dma_attr(struct device *dev); + int device_get_phy_mode(struct device *dev); void *device_get_mac_address(struct device *dev, char *addr, int alen); -- cgit v1.2.3 From ab3d527329f01dd63dc852041006d1a24895d116 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:51 -0700 Subject: device property: ACPI: Remove unused DMA APIs These DMA APIs are replaced with the newer versions, which return the enum dev_dma_attr. So, we can safely remove them. Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 5 ----- include/linux/property.h | 2 -- 2 files changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 292af3b69ede..b5868300df75 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -574,11 +574,6 @@ static inline int acpi_device_modalias(struct device *dev, return -ENODEV; } -static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent) -{ - return false; -} - static inline bool acpi_dma_supported(struct acpi_device *adev) { return false; diff --git a/include/linux/property.h b/include/linux/property.h index 7200490b7e6f..0a3705a7c9f2 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -174,8 +174,6 @@ struct property_set { void device_add_property_set(struct device *dev, struct property_set *pset); -bool device_dma_is_coherent(struct device *dev); - bool device_dma_supported(struct device *dev); enum dev_dma_attr device_get_dma_attr(struct device *dev); -- cgit v1.2.3 From 50230713b63941f4b6b562eea0834f751aa0801e Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:53 -0700 Subject: PCI: OF: Move of_pci_dma_configure() to pci_dma_configure() This patch move of_pci_dma_configure() to a more generic pci_dma_configure(), which can be extended by non-OF code (e.g. ACPI). This has no functional change. Signed-off-by: Suravee Suthikulpanit Acked-by: Rob Herring Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/of_pci.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h index 29fd3fe1c035..ce0e5abeb454 100644 --- a/include/linux/of_pci.h +++ b/include/linux/of_pci.h @@ -16,7 +16,6 @@ int of_pci_get_devfn(struct device_node *np); int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin); int of_pci_parse_bus_range(struct device_node *node, struct resource *res); int of_get_pci_domain_nr(struct device_node *node); -void of_pci_dma_configure(struct pci_dev *pci_dev); #else static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq) { @@ -51,8 +50,6 @@ of_get_pci_domain_nr(struct device_node *node) { return -1; } - -static inline void of_pci_dma_configure(struct pci_dev *pci_dev) { } #endif #if defined(CONFIG_OF_ADDRESS) -- cgit v1.2.3 From e2b19197ff9dc46f3e3888f273c4395f9e5a9856 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:09 -0800 Subject: mm, page_alloc: remove unnecessary parameter from zone_watermark_ok_safe Overall, the intent of this series is to remove the zonelist cache which was introduced to avoid high overhead in the page allocator. Once this is done, it is necessary to reduce the cost of watermark checks. The series starts with minor micro-optimisations. Next it notes that GFP flags that affect watermark checks are abused. __GFP_WAIT historically identified callers that could not sleep and could access reserves. This was later abused to identify callers that simply prefer to avoid sleeping and have other options. A patch distinguishes between atomic callers, high-priority callers and those that simply wish to avoid sleep. The zonelist cache has been around for a long time but it is of dubious merit with a lot of complexity and some issues that are explained. The most important issue is that a failed THP allocation can cause a zone to be treated as "full". This potentially causes unnecessary stalls, reclaim activity or remote fallbacks. The issues could be fixed but it's not worth it. The series places a small number of other micro-optimisations on top before examining GFP flags watermarks. High-order watermarks enforcement can cause high-order allocations to fail even though pages are free. The watermark checks both protect high-order atomic allocations and make kswapd aware of high-order pages but there is a much better way that can be handled using migrate types. This series uses page grouping by mobility to reserve pageblocks for high-order allocations with the size of the reservation depending on demand. kswapd awareness is maintained by examining the free lists. By patch 12 in this series, there are no high-order watermark checks while preserving the properties that motivated the introduction of the watermark checks. This patch (of 10): No user of zone_watermark_ok_safe() specifies alloc_flags. This patch removes the unnecessary parameter. Signed-off-by: Mel Gorman Acked-by: David Rientjes Acked-by: Vlastimil Babka Acked-by: Michal Hocko Reviewed-by: Christoph Lameter Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2d7e660cdefe..e326843c995a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -817,7 +817,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx, int alloc_flags); bool zone_watermark_ok_safe(struct zone *z, unsigned int order, - unsigned long mark, int classzone_idx, int alloc_flags); + unsigned long mark, int classzone_idx); enum memmap_context { MEMMAP_EARLY, MEMMAP_HOTPLUG, -- cgit v1.2.3 From 46e700abc44ce215acb4341d9702ce3972eda571 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:15 -0800 Subject: mm, page_alloc: remove unnecessary taking of a seqlock when cpusets are disabled There is a seqcounter that protects against spurious allocation failures when a task is changing the allowed nodes in a cpuset. There is no need to check the seqcounter until a cpuset exists. Signed-off-by: Mel Gorman Acked-by: Christoph Lameter Acked-by: David Rientjes Acked-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 5a1311942358..85a868ccb493 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -104,6 +104,9 @@ extern void cpuset_print_current_mems_allowed(void); */ static inline unsigned int read_mems_allowed_begin(void) { + if (!cpusets_enabled()) + return 0; + return read_seqcount_begin(¤t->mems_allowed_seq); } @@ -115,6 +118,9 @@ static inline unsigned int read_mems_allowed_begin(void) */ static inline bool read_mems_allowed_retry(unsigned int seq) { + if (!cpusets_enabled()) + return false; + return read_seqcount_retry(¤t->mems_allowed_seq, seq); } -- cgit v1.2.3 From 016c13daa5c9e4827eca703e2f0621c131f2cca3 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:18 -0800 Subject: mm, page_alloc: use masks and shifts when converting GFP flags to migrate types This patch redefines which GFP bits are used for specifying mobility and the order of the migrate types. Once redefined it's possible to convert GFP flags to a migrate type with a simple mask and shift. The only downside is that readers of OOM kill messages and allocation failures may have been used to the existing values but scripts/gfp-translate will help. Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Christoph Lameter Cc: David Rientjes Cc: Johannes Weiner Cc: Michal Hocko Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 12 +++++++----- include/linux/mmzone.h | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index f92cbd2f4450..440fca3e7e5d 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -14,7 +14,7 @@ struct vm_area_struct; #define ___GFP_HIGHMEM 0x02u #define ___GFP_DMA32 0x04u #define ___GFP_MOVABLE 0x08u -#define ___GFP_WAIT 0x10u +#define ___GFP_RECLAIMABLE 0x10u #define ___GFP_HIGH 0x20u #define ___GFP_IO 0x40u #define ___GFP_FS 0x80u @@ -29,7 +29,7 @@ struct vm_area_struct; #define ___GFP_NOMEMALLOC 0x10000u #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u -#define ___GFP_RECLAIMABLE 0x80000u +#define ___GFP_WAIT 0x80000u #define ___GFP_NOACCOUNT 0x100000u #define ___GFP_NOTRACK 0x200000u #define ___GFP_NO_KSWAPD 0x400000u @@ -126,6 +126,7 @@ struct vm_area_struct; /* This mask makes up all the page movable related flags */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) +#define GFP_MOVABLE_SHIFT 3 /* Control page allocator reclaim behavior */ #define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\ @@ -152,14 +153,15 @@ struct vm_area_struct; /* Convert GFP flags to their corresponding migrate type */ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) { - WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); + VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); + BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE); + BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE); if (unlikely(page_group_by_mobility_disabled)) return MIGRATE_UNMOVABLE; /* Group based on mobility */ - return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) | - ((gfp_flags & __GFP_RECLAIMABLE) != 0); + return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT; } #ifdef CONFIG_HIGHMEM diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e326843c995a..38bed71758ab 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -37,8 +37,8 @@ enum { MIGRATE_UNMOVABLE, - MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, + MIGRATE_RECLAIMABLE, MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ MIGRATE_RESERVE = MIGRATE_PCPTYPES, #ifdef CONFIG_CMA -- cgit v1.2.3 From d0164adc89f6bb374d304ffcc375c6d2652fe67d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:21 -0800 Subject: mm, page_alloc: distinguish between being unable to sleep, unwilling to sleep and avoiding waking kswapd __GFP_WAIT has been used to identify atomic context in callers that hold spinlocks or are in interrupts. They are expected to be high priority and have access one of two watermarks lower than "min" which can be referred to as the "atomic reserve". __GFP_HIGH users get access to the first lower watermark and can be called the "high priority reserve". Over time, callers had a requirement to not block when fallback options were available. Some have abused __GFP_WAIT leading to a situation where an optimisitic allocation with a fallback option can access atomic reserves. This patch uses __GFP_ATOMIC to identify callers that are truely atomic, cannot sleep and have no alternative. High priority users continue to use __GFP_HIGH. __GFP_DIRECT_RECLAIM identifies callers that can sleep and are willing to enter direct reclaim. __GFP_KSWAPD_RECLAIM to identify callers that want to wake kswapd for background reclaim. __GFP_WAIT is redefined as a caller that is willing to enter direct reclaim and wake kswapd for background reclaim. This patch then converts a number of sites o __GFP_ATOMIC is used by callers that are high priority and have memory pools for those requests. GFP_ATOMIC uses this flag. o Callers that have a limited mempool to guarantee forward progress clear __GFP_DIRECT_RECLAIM but keep __GFP_KSWAPD_RECLAIM. bio allocations fall into this category where kswapd will still be woken but atomic reserves are not used as there is a one-entry mempool to guarantee progress. o Callers that are checking if they are non-blocking should use the helper gfpflags_allow_blocking() where possible. This is because checking for __GFP_WAIT as was done historically now can trigger false positives. Some exceptions like dm-crypt.c exist where the code intent is clearer if __GFP_DIRECT_RECLAIM is used instead of the helper due to flag manipulations. o Callers that built their own GFP flags instead of starting with GFP_KERNEL and friends now also need to specify __GFP_KSWAPD_RECLAIM. The first key hazard to watch out for is callers that removed __GFP_WAIT and was depending on access to atomic reserves for inconspicuous reasons. In some cases it may be appropriate for them to use __GFP_HIGH. The second key hazard is callers that assembled their own combination of GFP flags instead of starting with something like GFP_KERNEL. They may now wish to specify __GFP_KSWAPD_RECLAIM. It's almost certainly harmless if it's missed in most cases as other activity will wake kswapd. Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Christoph Lameter Cc: David Rientjes Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 46 +++++++++++++++++++++++++++++++++------------- include/linux/skbuff.h | 6 +++--- 2 files changed, 36 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 440fca3e7e5d..b56e811b6f7c 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -29,12 +29,13 @@ struct vm_area_struct; #define ___GFP_NOMEMALLOC 0x10000u #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u -#define ___GFP_WAIT 0x80000u +#define ___GFP_ATOMIC 0x80000u #define ___GFP_NOACCOUNT 0x100000u #define ___GFP_NOTRACK 0x200000u -#define ___GFP_NO_KSWAPD 0x400000u +#define ___GFP_DIRECT_RECLAIM 0x400000u #define ___GFP_OTHER_NODE 0x800000u #define ___GFP_WRITE 0x1000000u +#define ___GFP_KSWAPD_RECLAIM 0x2000000u /* If the above are modified, __GFP_BITS_SHIFT may need updating */ /* @@ -71,7 +72,7 @@ struct vm_area_struct; * __GFP_MOVABLE: Flag that this page will be movable by the page migration * mechanism or reclaimed */ -#define __GFP_WAIT ((__force gfp_t)___GFP_WAIT) /* Can wait and reschedule? */ +#define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) /* Caller cannot wait or reschedule */ #define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) /* Should access emergency pools? */ #define __GFP_IO ((__force gfp_t)___GFP_IO) /* Can start physical IO? */ #define __GFP_FS ((__force gfp_t)___GFP_FS) /* Can call down to low-level FS? */ @@ -94,23 +95,37 @@ struct vm_area_struct; #define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */ #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ -#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ +/* + * A caller that is willing to wait may enter direct reclaim and will + * wake kswapd to reclaim pages in the background until the high + * watermark is met. A caller may wish to clear __GFP_DIRECT_RECLAIM to + * avoid unnecessary delays when a fallback option is available but + * still allow kswapd to reclaim in the background. The kswapd flag + * can be cleared when the reclaiming of pages would cause unnecessary + * disruption. + */ +#define __GFP_WAIT ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) +#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ +#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ + /* * This may seem redundant, but it's a way of annotating false positives vs. * allocations that simply cannot be supported (e.g. page tables). */ #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) -#define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */ +#define __GFP_BITS_SHIFT 26 /* Room for N __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) -/* This equals 0, but use constants in case they ever change */ -#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH) -/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */ -#define GFP_ATOMIC (__GFP_HIGH) +/* + * GFP_ATOMIC callers can not sleep, need the allocation to succeed. + * A lower watermark is applied to allow access to "atomic reserves" + */ +#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) +#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) #define GFP_NOIO (__GFP_WAIT) #define GFP_NOFS (__GFP_WAIT | __GFP_IO) #define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS) @@ -119,10 +134,10 @@ struct vm_area_struct; #define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) -#define GFP_IOFS (__GFP_IO | __GFP_FS) -#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ - __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ - __GFP_NO_KSWAPD) +#define GFP_IOFS (__GFP_IO | __GFP_FS | __GFP_KSWAPD_RECLAIM) +#define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ + __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \ + ~__GFP_KSWAPD_RECLAIM) /* This mask makes up all the page movable related flags */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) @@ -164,6 +179,11 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT; } +static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) +{ + return gfp_flags & __GFP_DIRECT_RECLAIM; +} + #ifdef CONFIG_HIGHMEM #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM #else diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 24f4dfd94c51..4355129fff91 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1224,7 +1224,7 @@ static inline int skb_cloned(const struct sk_buff *skb) static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) { - might_sleep_if(pri & __GFP_WAIT); + might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_cloned(skb)) return pskb_expand_head(skb, 0, 0, pri); @@ -1308,7 +1308,7 @@ static inline int skb_shared(const struct sk_buff *skb) */ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) { - might_sleep_if(pri & __GFP_WAIT); + might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_shared(skb)) { struct sk_buff *nskb = skb_clone(skb, pri); @@ -1344,7 +1344,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) static inline struct sk_buff *skb_unshare(struct sk_buff *skb, gfp_t pri) { - might_sleep_if(pri & __GFP_WAIT); + might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, pri); -- cgit v1.2.3 From 40113370836e8e79befa585277296ed42781ef31 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:25 -0800 Subject: mm: page_alloc: remove GFP_IOFS GFP_IOFS was intended to be shorthand for clearing two flags, not a set of allocation flags. There is only one user of this flag combination now and there appears to be no reason why Lustre had to be protected from reclaim stalls. As none of the sites appear to be atomic, this patch simply deletes GFP_IOFS and converts Lustre to using GFP_KERNEL, GFP_NOFS or GFP_NOIO as appropriate. Signed-off-by: Mel Gorman Cc: Oleg Drokin Cc: Andreas Dilger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index b56e811b6f7c..86f9f7da86ea 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -134,7 +134,6 @@ struct vm_area_struct; #define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) -#define GFP_IOFS (__GFP_IO | __GFP_FS | __GFP_KSWAPD_RECLAIM) #define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \ ~__GFP_KSWAPD_RECLAIM) -- cgit v1.2.3 From 71baba4b92dc1fa1bc461742c6ab1942ec6034e9 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:28 -0800 Subject: mm, page_alloc: rename __GFP_WAIT to __GFP_RECLAIM __GFP_WAIT was used to signal that the caller was in atomic context and could not sleep. Now it is possible to distinguish between true atomic context and callers that are not willing to sleep. The latter should clear __GFP_DIRECT_RECLAIM so kswapd will still wake. As clearing __GFP_WAIT behaves differently, there is a risk that people will clear the wrong flags. This patch renames __GFP_WAIT to __GFP_RECLAIM to clearly indicate what it does -- setting it allows all reclaim activity, clearing them prevents it. [akpm@linux-foundation.org: fix build] [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Mel Gorman Acked-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Johannes Weiner Cc: Christoph Lameter Acked-by: David Rientjes Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 86f9f7da86ea..369227202ac2 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -107,7 +107,7 @@ struct vm_area_struct; * can be cleared when the reclaiming of pages would cause unnecessary * disruption. */ -#define __GFP_WAIT ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) +#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) #define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ #define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ @@ -126,12 +126,12 @@ struct vm_area_struct; */ #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) -#define GFP_NOIO (__GFP_WAIT) -#define GFP_NOFS (__GFP_WAIT | __GFP_IO) -#define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS) -#define GFP_TEMPORARY (__GFP_WAIT | __GFP_IO | __GFP_FS | \ +#define GFP_NOIO (__GFP_RECLAIM) +#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) +#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) +#define GFP_TEMPORARY (__GFP_RECLAIM | __GFP_IO | __GFP_FS | \ __GFP_RECLAIMABLE) -#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) +#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) #define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ @@ -143,12 +143,12 @@ struct vm_area_struct; #define GFP_MOVABLE_SHIFT 3 /* Control page allocator reclaim behavior */ -#define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\ +#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC) /* Control slab gfp mask during early boot */ -#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS)) +#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS)) /* Control allocation constraints */ #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) -- cgit v1.2.3 From f77cf4e4cc9d40310a7224a1a67c733aeec78836 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:31 -0800 Subject: mm, page_alloc: delete the zonelist_cache The zonelist cache (zlc) was introduced to skip over zones that were recently known to be full. This avoided expensive operations such as the cpuset checks, watermark calculations and zone_reclaim. The situation today is different and the complexity of zlc is harder to justify. 1) The cpuset checks are no-ops unless a cpuset is active and in general are a lot cheaper. 2) zone_reclaim is now disabled by default and I suspect that was a large source of the cost that zlc wanted to avoid. When it is enabled, it's known to be a major source of stalling when nodes fill up and it's unwise to hit every other user with the overhead. 3) Watermark checks are expensive to calculate for high-order allocation requests. Later patches in this series will reduce the cost of the watermark checking. 4) The most important issue is that in the current implementation it is possible for a failed THP allocation to mark a zone full for order-0 allocations and cause a fallback to remote nodes. The last issue could be addressed with additional complexity but as the benefit of zlc is questionable, it is better to remove it. If stalls due to zone_reclaim are ever reported then an alternative would be to introduce deferring logic based on a timeout inside zone_reclaim itself and leave the page allocator fast paths alone. The impact on page-allocator microbenchmarks is negligible as they don't hit the paths where the zlc comes into play. Most page-reclaim related workloads showed no noticeable difference as a result of the removal. The impact was noticeable in a workload called "stutter". One part uses a lot of anonymous memory, a second measures mmap latency and a third copies a large file. In an ideal world the latency application would not notice the mmap latency. On a 2-node machine the results of this patch are stutter 4.3.0-rc1 4.3.0-rc1 baseline nozlc-v4 Min mmap 20.9243 ( 0.00%) 20.7716 ( 0.73%) 1st-qrtle mmap 22.0612 ( 0.00%) 22.0680 ( -0.03%) 2nd-qrtle mmap 22.3291 ( 0.00%) 22.3809 ( -0.23%) 3rd-qrtle mmap 25.2244 ( 0.00%) 25.2396 ( -0.06%) Max-90% mmap 48.0995 ( 0.00%) 28.3713 ( 41.02%) Max-93% mmap 52.5557 ( 0.00%) 36.0170 ( 31.47%) Max-95% mmap 55.8173 ( 0.00%) 47.3163 ( 15.23%) Max-99% mmap 67.3781 ( 0.00%) 70.1140 ( -4.06%) Max mmap 24447.6375 ( 0.00%) 12915.1356 ( 47.17%) Mean mmap 33.7883 ( 0.00%) 27.7944 ( 17.74%) Best99%Mean mmap 27.7825 ( 0.00%) 25.2767 ( 9.02%) Best95%Mean mmap 26.3912 ( 0.00%) 23.7994 ( 9.82%) Best90%Mean mmap 24.9886 ( 0.00%) 23.2251 ( 7.06%) Best50%Mean mmap 22.0157 ( 0.00%) 22.0261 ( -0.05%) Best10%Mean mmap 21.6705 ( 0.00%) 21.6083 ( 0.29%) Best5%Mean mmap 21.5581 ( 0.00%) 21.4611 ( 0.45%) Best1%Mean mmap 21.3079 ( 0.00%) 21.1631 ( 0.68%) Note that the maximum stall latency went from 24 seconds to 12 which is still bad but an improvement. The milage varies considerably 2-node machine on an earlier test went from 494 seconds to 47 seconds and a 4-node machine that tested an earlier version of this patch went from a worst case stall time of 6 seconds to 67ms. The nature of the benchmark is inherently unpredictable as it is hammering the system and the milage will vary between machines. There is a secondary impact with potentially more direct reclaim because zones are now being considered instead of being skipped by zlc. In this particular test run it did not occur so will not be described. However, in at least one test the following was observed 1. Direct reclaim rates were higher. This was likely due to direct reclaim being entered instead of the zlc disabling a zone and busy looping. Busy looping may have the effect of allowing kswapd to make more progress and in some cases may be better overall. If this is found then the correct action is to put direct reclaimers to sleep on a waitqueue and allow kswapd make forward progress. Busy looping on the zlc is even worse than when the allocator used to blindly call congestion_wait(). 2. There was higher swap activity as direct reclaim was active. 3. Direct reclaim efficiency was lower. This is related to 1 as more scanning activity also encountered more pages that could not be immediately reclaimed In that case, the direct page scan and reclaim rates are noticeable but it is not considered a problem for a few reasons 1. The test is primarily concerned with latency. The mmap attempts are also faulted which means there are THP allocation requests. The ZLC could cause zones to be disabled causing the process to busy loop instead of reclaiming. This looks like elevated direct reclaim activity but it's the correct action to take based on what processes requested. 2. The test hammers reclaim and compaction heavily. The number of successful THP faults is highly variable but affects the reclaim stats. It's not a realistic or reasonable measure of page reclaim activity. 3. No other page-reclaim intensive workload that was tested showed a problem. 4. If a workload is identified that benefitted from the busy looping then it should be fixed by having direct reclaimers sleep on a wait queue until woken by kswapd instead of busy looping. We had this class of problem before when congestion_waits() with a fixed timeout was a brain damaged decision but happened to benefit some workloads. If a workload is identified that relied on the zlc to busy loop then it should be fixed correctly and have a direct reclaimer sleep on a waitqueue until woken by kswapd. Signed-off-by: Mel Gorman Acked-by: David Rientjes Acked-by: Christoph Lameter Acked-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 74 -------------------------------------------------- 1 file changed, 74 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 38bed71758ab..1e88aae329ff 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -589,75 +589,8 @@ static inline bool zone_is_empty(struct zone *zone) * [1] : No fallback (__GFP_THISNODE) */ #define MAX_ZONELISTS 2 - - -/* - * We cache key information from each zonelist for smaller cache - * footprint when scanning for free pages in get_page_from_freelist(). - * - * 1) The BITMAP fullzones tracks which zones in a zonelist have come - * up short of free memory since the last time (last_fullzone_zap) - * we zero'd fullzones. - * 2) The array z_to_n[] maps each zone in the zonelist to its node - * id, so that we can efficiently evaluate whether that node is - * set in the current tasks mems_allowed. - * - * Both fullzones and z_to_n[] are one-to-one with the zonelist, - * indexed by a zones offset in the zonelist zones[] array. - * - * The get_page_from_freelist() routine does two scans. During the - * first scan, we skip zones whose corresponding bit in 'fullzones' - * is set or whose corresponding node in current->mems_allowed (which - * comes from cpusets) is not set. During the second scan, we bypass - * this zonelist_cache, to ensure we look methodically at each zone. - * - * Once per second, we zero out (zap) fullzones, forcing us to - * reconsider nodes that might have regained more free memory. - * The field last_full_zap is the time we last zapped fullzones. - * - * This mechanism reduces the amount of time we waste repeatedly - * reexaming zones for free memory when they just came up low on - * memory momentarilly ago. - * - * The zonelist_cache struct members logically belong in struct - * zonelist. However, the mempolicy zonelists constructed for - * MPOL_BIND are intentionally variable length (and usually much - * shorter). A general purpose mechanism for handling structs with - * multiple variable length members is more mechanism than we want - * here. We resort to some special case hackery instead. - * - * The MPOL_BIND zonelists don't need this zonelist_cache (in good - * part because they are shorter), so we put the fixed length stuff - * at the front of the zonelist struct, ending in a variable length - * zones[], as is needed by MPOL_BIND. - * - * Then we put the optional zonelist cache on the end of the zonelist - * struct. This optional stuff is found by a 'zlcache_ptr' pointer in - * the fixed length portion at the front of the struct. This pointer - * both enables us to find the zonelist cache, and in the case of - * MPOL_BIND zonelists, (which will just set the zlcache_ptr to NULL) - * to know that the zonelist cache is not there. - * - * The end result is that struct zonelists come in two flavors: - * 1) The full, fixed length version, shown below, and - * 2) The custom zonelists for MPOL_BIND. - * The custom MPOL_BIND zonelists have a NULL zlcache_ptr and no zlcache. - * - * Even though there may be multiple CPU cores on a node modifying - * fullzones or last_full_zap in the same zonelist_cache at the same - * time, we don't lock it. This is just hint data - if it is wrong now - * and then, the allocator will still function, perhaps a bit slower. - */ - - -struct zonelist_cache { - unsigned short z_to_n[MAX_ZONES_PER_ZONELIST]; /* zone->nid */ - DECLARE_BITMAP(fullzones, MAX_ZONES_PER_ZONELIST); /* zone full? */ - unsigned long last_full_zap; /* when last zap'd (jiffies) */ -}; #else #define MAX_ZONELISTS 1 -struct zonelist_cache; #endif /* @@ -675,9 +608,6 @@ struct zoneref { * allocation, the other zones are fallback zones, in decreasing * priority. * - * If zlcache_ptr is not NULL, then it is just the address of zlcache, - * as explained above. If zlcache_ptr is NULL, there is no zlcache. - * * * To speed the reading of the zonelist, the zonerefs contain the zone index * of the entry being read. Helper functions to access information given * a struct zoneref are @@ -687,11 +617,7 @@ struct zoneref { * zonelist_node_idx() - Return the index of the node for an entry */ struct zonelist { - struct zonelist_cache *zlcache_ptr; // NULL or &zlcache struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1]; -#ifdef CONFIG_NUMA - struct zonelist_cache zlcache; // optional ... -#endif }; #ifndef CONFIG_DISCONTIGMEM -- cgit v1.2.3 From 974a786e63c96a2401a78ddba926f34c128474f1 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:34 -0800 Subject: mm, page_alloc: remove MIGRATE_RESERVE MIGRATE_RESERVE preserves an old property of the buddy allocator that existed prior to fragmentation avoidance -- min_free_kbytes worth of pages tended to remain contiguous until the only alternative was to fail the allocation. At the time it was discovered that high-order atomic allocations relied on this property so MIGRATE_RESERVE was introduced. A later patch will introduce an alternative MIGRATE_HIGHATOMIC so this patch deletes MIGRATE_RESERVE and supporting code so it'll be easier to review. Note that this patch in isolation may look like a false regression if someone was bisecting high-order atomic allocation failures. Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Christoph Lameter Cc: David Rientjes Cc: Johannes Weiner Cc: Michal Hocko Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1e88aae329ff..b86cfa3313cf 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -39,8 +39,6 @@ enum { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RECLAIMABLE, - MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ - MIGRATE_RESERVE = MIGRATE_PCPTYPES, #ifdef CONFIG_CMA /* * MIGRATE_CMA migration type is designed to mimic the way @@ -63,6 +61,8 @@ enum { MIGRATE_TYPES }; +#define MIGRATE_PCPTYPES (MIGRATE_RECLAIMABLE+1) + #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) #else @@ -429,12 +429,6 @@ struct zone { const char *name; - /* - * Number of MIGRATE_RESERVE page block. To maintain for just - * optimization. Protected by zone->lock. - */ - int nr_migrate_reserve_block; - #ifdef CONFIG_MEMORY_ISOLATION /* * Number of isolated pageblock. It is used to solve incorrect -- cgit v1.2.3 From 0aaa29a56e4fb0fc9e24edb649e2733a672ca099 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:37 -0800 Subject: mm, page_alloc: reserve pageblocks for high-order atomic allocations on demand High-order watermark checking exists for two reasons -- kswapd high-order awareness and protection for high-order atomic requests. Historically the kernel depended on MIGRATE_RESERVE to preserve min_free_kbytes as high-order free pages for as long as possible. This patch introduces MIGRATE_HIGHATOMIC that reserves pageblocks for high-order atomic allocations on demand and avoids using those blocks for order-0 allocations. This is more flexible and reliable than MIGRATE_RESERVE was. A MIGRATE_HIGHORDER pageblock is created when an atomic high-order allocation request steals a pageblock but limits the total number to 1% of the zone. Callers that speculatively abuse atomic allocations for long-lived high-order allocations to access the reserve will quickly fail. Note that SLUB is currently not such an abuser as it reclaims at least once. It is possible that the pageblock stolen has few suitable high-order pages and will need to steal again in the near future but there would need to be strong justification to search all pageblocks for an ideal candidate. The pageblocks are unreserved if an allocation fails after a direct reclaim attempt. The watermark checks account for the reserved pageblocks when the allocation request is not a high-order atomic allocation. The reserved pageblocks can not be used for order-0 allocations. This may allow temporary wastage until a failed reclaim reassigns the pageblock. This is deliberate as the intent of the reservation is to satisfy a limited number of atomic high-order short-lived requests if the system requires them. The stutter benchmark was used to evaluate this but while it was running there was a systemtap script that randomly allocated between 1 high-order page and 12.5% of memory's worth of order-3 pages using GFP_ATOMIC. This is much larger than the potential reserve and it does not attempt to be realistic. It is intended to stress random high-order allocations from an unknown source, show that there is a reduction in failures without introducing an anomaly where atomic allocations are more reliable than regular allocations. The amount of memory reserved varied throughout the workload as reserves were created and reclaimed under memory pressure. The allocation failures once the workload warmed up were as follows; 4.2-rc5-vanilla 70% 4.2-rc5-atomic-reserve 56% The failure rate was also measured while building multiple kernels. The failure rate was 14% but is 6% with this patch applied. Overall, this is a small reduction but the reserves are small relative to the number of allocation requests. In early versions of the patch, the failure rate reduced by a much larger amount but that required much larger reserves and perversely made atomic allocations seem more reliable than regular allocations. [yalin.wang2010@gmail.com: fix redundant check and a memory leak] Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Christoph Lameter Cc: David Rientjes Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: yalin wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b86cfa3313cf..d3bafe4ff32b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -39,6 +39,8 @@ enum { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RECLAIMABLE, + MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ + MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES, #ifdef CONFIG_CMA /* * MIGRATE_CMA migration type is designed to mimic the way @@ -61,8 +63,6 @@ enum { MIGRATE_TYPES }; -#define MIGRATE_PCPTYPES (MIGRATE_RECLAIMABLE+1) - #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) #else @@ -334,6 +334,8 @@ struct zone { /* zone watermarks, access with *_wmark_pages(zone) macros */ unsigned long watermark[NR_WMARK]; + unsigned long nr_reserved_highatomic; + /* * We don't know if the memory that we're going to allocate will be freeable * or/and it will be released eventually, so to avoid totally wasting several -- cgit v1.2.3 From dd56b046426760aa0c852ad6e4b6b07891222d65 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:43 -0800 Subject: mm: page_alloc: hide some GFP internals and document the bits and flag combinations Andrew stated the following We have quite a history of remote parts of the kernel using weird/wrong/inexplicable combinations of __GFP_ flags. I tend to think that this is because we didn't adequately explain the interface. And I don't think that gfp.h really improved much in this area as a result of this patchset. Could you go through it some time and decide if we've adequately documented all this stuff? This patches first moves some GFP flag combinations that are part of the MM internals to mm/internal.h. The rest of the patch documents the __GFP_FOO bits under various headings and then documents the flag combinations. It will not help callers that are brain damaged but the clarity might motivate some fixes and avoid future mistakes. Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Rik van Riel Cc: Vlastimil Babka Cc: David Rientjes Cc: Joonsoo Kim Cc: Michal Hocko Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 251 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 171 insertions(+), 80 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 369227202ac2..6523109e136d 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -39,9 +39,7 @@ struct vm_area_struct; /* If the above are modified, __GFP_BITS_SHIFT may need updating */ /* - * GFP bitmasks.. - * - * Zone modifiers (see linux/mmzone.h - low three bits) + * Physical address zone modifiers (see linux/mmzone.h - low four bits) * * Do not put any conditional on these. If necessary modify the definitions * without the underscores and use them consistently. The definitions here may @@ -51,120 +49,211 @@ struct vm_area_struct; #define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) #define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) #define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* Page is movable */ +#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ #define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) + /* - * Action modifiers - doesn't change the zoning + * Page mobility and placement hints * - * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt - * _might_ fail. This depends upon the particular VM implementation. + * These flags provide hints about how mobile the page is. Pages with similar + * mobility are placed within the same pageblocks to minimise problems due + * to external fragmentation. * - * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller - * cannot handle allocation failures. New users should be evaluated carefully - * (and the flag should be used only when there is no reasonable failure policy) - * but it is definitely preferable to use the flag rather than opencode endless - * loop around allocator. + * __GFP_MOVABLE (also a zone modifier) indicates that the page can be + * moved by page migration during memory compaction or can be reclaimed. * - * __GFP_NORETRY: The VM implementation must not retry indefinitely and will - * return NULL when direct reclaim and memory compaction have failed to allow - * the allocation to succeed. The OOM killer is not called with the current - * implementation. + * __GFP_RECLAIMABLE is used for slab allocations that specify + * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers. + * + * __GFP_WRITE indicates the caller intends to dirty the page. Where possible, + * these pages will be spread between local zones to avoid all the dirty + * pages being in one zone (fair zone allocation policy). * - * __GFP_MOVABLE: Flag that this page will be movable by the page migration - * mechanism or reclaimed + * __GFP_HARDWALL enforces the cpuset memory allocation policy. + * + * __GFP_THISNODE forces the allocation to be satisified from the requested + * node with no fallbacks or placement policy enforcements. */ -#define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) /* Caller cannot wait or reschedule */ -#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) /* Should access emergency pools? */ -#define __GFP_IO ((__force gfp_t)___GFP_IO) /* Can start physical IO? */ -#define __GFP_FS ((__force gfp_t)___GFP_FS) /* Can call down to low-level FS? */ -#define __GFP_COLD ((__force gfp_t)___GFP_COLD) /* Cache-cold page required */ -#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) /* Suppress page allocation failure warning */ -#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT) /* See above */ -#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) /* See above */ -#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) /* See above */ -#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC)/* Allow access to emergency reserves */ -#define __GFP_COMP ((__force gfp_t)___GFP_COMP) /* Add compound page metadata */ -#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) /* Return zeroed page on success */ -#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves. - * This takes precedence over the - * __GFP_MEMALLOC flag if both are - * set - */ -#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */ -#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */ -#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */ -#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */ -#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ - -#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ -#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ +#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) +#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) +#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) +#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) /* - * A caller that is willing to wait may enter direct reclaim and will - * wake kswapd to reclaim pages in the background until the high - * watermark is met. A caller may wish to clear __GFP_DIRECT_RECLAIM to - * avoid unnecessary delays when a fallback option is available but - * still allow kswapd to reclaim in the background. The kswapd flag - * can be cleared when the reclaiming of pages would cause unnecessary - * disruption. + * Watermark modifiers -- controls access to emergency reserves + * + * __GFP_HIGH indicates that the caller is high-priority and that granting + * the request is necessary before the system can make forward progress. + * For example, creating an IO context to clean pages. + * + * __GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is + * high priority. Users are typically interrupt handlers. This may be + * used in conjunction with __GFP_HIGH + * + * __GFP_MEMALLOC allows access to all memory. This should only be used when + * the caller guarantees the allocation will allow more memory to be freed + * very shortly e.g. process exiting or swapping. Users either should + * be the MM or co-ordinating closely with the VM (e.g. swap over NFS). + * + * __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. + * This takes precedence over the __GFP_MEMALLOC flag if both are set. + * + * __GFP_NOACCOUNT ignores the accounting for kmemcg limit enforcement. */ -#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) +#define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) +#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) +#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) +#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) +#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) + +/* + * Reclaim modifiers + * + * __GFP_IO can start physical IO. + * + * __GFP_FS can call down to the low-level FS. Clearing the flag avoids the + * allocator recursing into the filesystem which might already be holding + * locks. + * + * __GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim. + * This flag can be cleared to avoid unnecessary delays when a fallback + * option is available. + * + * __GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when + * the low watermark is reached and have it reclaim pages until the high + * watermark is reached. A caller may wish to clear this flag when fallback + * options are available and the reclaim is likely to disrupt the system. The + * canonical example is THP allocation where a fallback is cheap but + * reclaim/compaction may cause indirect stalls. + * + * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. + * + * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt + * _might_ fail. This depends upon the particular VM implementation. + * + * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller + * cannot handle allocation failures. New users should be evaluated carefully + * (and the flag should be used only when there is no reasonable failure + * policy) but it is definitely preferable to use the flag rather than + * opencode endless loop around allocator. + * + * __GFP_NORETRY: The VM implementation must not retry indefinitely and will + * return NULL when direct reclaim and memory compaction have failed to allow + * the allocation to succeed. The OOM killer is not called with the current + * implementation. + */ +#define __GFP_IO ((__force gfp_t)___GFP_IO) +#define __GFP_FS ((__force gfp_t)___GFP_FS) #define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ #define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ +#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) +#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT) +#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) +#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) /* - * This may seem redundant, but it's a way of annotating false positives vs. - * allocations that simply cannot be supported (e.g. page tables). + * Action modifiers + * + * __GFP_COLD indicates that the caller does not expect to be used in the near + * future. Where possible, a cache-cold page will be returned. + * + * __GFP_NOWARN suppresses allocation failure reports. + * + * __GFP_COMP address compound page metadata. + * + * __GFP_ZERO returns a zeroed page on success. + * + * __GFP_NOTRACK avoids tracking with kmemcheck. + * + * __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of + * distinguishing in the source between false positives and allocations that + * cannot be supported (e.g. page tables). + * + * __GFP_OTHER_NODE is for allocations that are on a remote node but that + * should not be accounted for as a remote allocation in vmstat. A + * typical user would be khugepaged collapsing a huge page on a remote + * node. */ +#define __GFP_COLD ((__force gfp_t)___GFP_COLD) +#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) +#define __GFP_COMP ((__force gfp_t)___GFP_COMP) +#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) +#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) +#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) -#define __GFP_BITS_SHIFT 26 /* Room for N __GFP_FOO bits */ +/* Room for N __GFP_FOO bits */ +#define __GFP_BITS_SHIFT 26 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* - * GFP_ATOMIC callers can not sleep, need the allocation to succeed. - * A lower watermark is applied to allow access to "atomic reserves" + * Useful GFP flag combinations that are commonly used. It is recommended + * that subsystems start with one of these combinations and then set/clear + * __GFP_FOO flags as necessary. + * + * GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower + * watermark is applied to allow access to "atomic reserves" + * + * GFP_KERNEL is typical for kernel-internal allocations. The caller requires + * ZONE_NORMAL or a lower zone for direct access but can direct reclaim. + * + * GFP_NOWAIT is for kernel allocations that should not stall for direct + * reclaim, start physical IO or use any filesystem callback. + * + * GFP_NOIO will use direct reclaim to discard clean pages or slab pages + * that do not require the starting of any physical IO. + * + * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. + * + * GFP_USER is for userspace allocations that also need to be directly + * accessibly by the kernel or hardware. It is typically used by hardware + * for buffers that are mapped to userspace (e.g. graphics) that hardware + * still must DMA to. cpuset limits are enforced for these allocations. + * + * GFP_DMA exists for historical reasons and should be avoided where possible. + * The flags indicates that the caller requires that the lowest zone be + * used (ZONE_DMA or 16M on x86-64). Ideally, this would be removed but + * it would require careful auditing as some users really require it and + * others use the flag to avoid lowmem reserves in ZONE_DMA and treat the + * lowest zone as a type of emergency reserve. + * + * GFP_DMA32 is similar to GFP_DMA except that the caller requires a 32-bit + * address. + * + * GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, + * do not need to be directly accessible by the kernel but that cannot + * move once in use. An example may be a hardware allocation that maps + * data directly into userspace but has no addressing limitations. + * + * GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not + * need direct access to but can use kmap() when access is required. They + * are expected to be movable via page reclaim or page migration. Typically, + * pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE. + * + * GFP_TRANSHUGE is used for THP allocations. They are compound allocations + * that will fail quickly if memory is not available and will not wake + * kswapd on failure. */ #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) +#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) #define GFP_NOIO (__GFP_RECLAIM) #define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) -#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) #define GFP_TEMPORARY (__GFP_RECLAIM | __GFP_IO | __GFP_FS | \ __GFP_RECLAIMABLE) #define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) +#define GFP_DMA __GFP_DMA +#define GFP_DMA32 __GFP_DMA32 #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) #define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \ ~__GFP_KSWAPD_RECLAIM) -/* This mask makes up all the page movable related flags */ +/* Convert GFP flags to their corresponding migrate type */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) #define GFP_MOVABLE_SHIFT 3 -/* Control page allocator reclaim behavior */ -#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ - __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ - __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC) - -/* Control slab gfp mask during early boot */ -#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS)) - -/* Control allocation constraints */ -#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) - -/* Do not use these with a slab allocator */ -#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK) - -/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some - platforms, used as appropriate on others */ - -#define GFP_DMA __GFP_DMA - -/* 4GB DMA on some platforms */ -#define GFP_DMA32 __GFP_DMA32 - -/* Convert GFP flags to their corresponding migrate type */ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) { VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); @@ -177,6 +266,8 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) /* Group based on mobility */ return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT; } +#undef GFP_MOVABLE_MASK +#undef GFP_MOVABLE_SHIFT static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) { -- cgit v1.2.3 From 89903327607232de32f05100cf03f9390b858e0b Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 6 Nov 2015 16:28:46 -0800 Subject: include/linux/mmzone.h: reflow comment Someone has an 86 column display. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d3bafe4ff32b..e23a9e704536 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -337,12 +337,13 @@ struct zone { unsigned long nr_reserved_highatomic; /* - * We don't know if the memory that we're going to allocate will be freeable - * or/and it will be released eventually, so to avoid totally wasting several - * GB of ram we must reserve some of the lower zone memory (otherwise we risk - * to run OOM on the lower zones despite there's tons of freeable ram - * on the higher zones). This array is recalculated at runtime if the - * sysctl_lowmem_reserve_ratio sysctl changes. + * We don't know if the memory that we're going to allocate will be + * freeable or/and it will be released eventually, so to avoid totally + * wasting several GB of ram we must reserve some of the lower zone + * memory (otherwise we risk to run OOM on the lower zones despite + * there being tons of freeable ram on the higher zones). This array is + * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl + * changes. */ long lowmem_reserve[MAX_NR_ZONES]; -- cgit v1.2.3 From c62d25556be6c965dc14288e796a576e8e39a7e9 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 6 Nov 2015 16:28:49 -0800 Subject: mm, fs: introduce mapping_gfp_constraint() There are many places which use mapping_gfp_mask to restrict a more generic gfp mask which would be used for allocations which are not directly related to the page cache but they are performed in the same context. Let's introduce a helper function which makes the restriction explicit and easier to track. This patch doesn't introduce any functional changes. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Michal Hocko Suggested-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a6c78e00ea96..26eabf5ec718 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -69,6 +69,13 @@ static inline gfp_t mapping_gfp_mask(struct address_space * mapping) return (__force gfp_t)mapping->flags & __GFP_BITS_MASK; } +/* Restricts the given gfp_mask to what the mapping allows. */ +static inline gfp_t mapping_gfp_constraint(struct address_space *mapping, + gfp_t gfp_mask) +{ + return mapping_gfp_mask(mapping) & gfp_mask; +} + /* * This is non-atomic. Only to be used before the mapping is activated. * Probably needs a barrier... -- cgit v1.2.3 From 3d9c637f4ae74b45d95bb6cbd793fbffad0a709c Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Fri, 6 Nov 2015 16:29:12 -0800 Subject: module: export param_free_charp() Change the param_free_charp() function from static to exported. It is used by zswap in the next patch ("zswap: use charp for zswap param strings"). Signed-off-by: Dan Streetman Acked-by: Rusty Russell Cc: Seth Jennings Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/moduleparam.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index c12f2147c350..52666d90ca94 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -386,6 +386,7 @@ extern int param_get_ullong(char *buffer, const struct kernel_param *kp); extern const struct kernel_param_ops param_ops_charp; extern int param_set_charp(const char *val, const struct kernel_param *kp); extern int param_get_charp(char *buffer, const struct kernel_param *kp); +extern void param_free_charp(void *arg); #define param_check_charp(name, p) __param_check(name, p, char *) /* We used to allow int as well as bool. We're taking that away! */ -- cgit v1.2.3 From 69e18f4dbedfbf208452e9da9979c92da30d2442 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Fri, 6 Nov 2015 16:29:18 -0800 Subject: zpool: remove redundant zpool->type string, const-ify zpool_get_type Make the return type of zpool_get_type const; the string belongs to the zpool driver and should not be modified. Remove the redundant type field in the struct zpool; it is private to zpool.c and isn't needed since ->driver->type can be used directly. Add comments indicating strings must be null-terminated. Signed-off-by: Dan Streetman Cc: Sergey Senozhatsky Cc: Seth Jennings Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zpool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/zpool.h b/include/linux/zpool.h index 42f8ec992452..1f405bee3cd5 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -41,7 +41,7 @@ bool zpool_has_pool(char *type); struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp, const struct zpool_ops *ops); -char *zpool_get_type(struct zpool *pool); +const char *zpool_get_type(struct zpool *pool); void zpool_destroy_pool(struct zpool *pool); -- cgit v1.2.3 From 6f3526d6db7cbe8b53e42d6bf0cad2072afcf3fe Mon Sep 17 00:00:00 2001 From: Sergey SENOZHATSKY Date: Fri, 6 Nov 2015 16:29:21 -0800 Subject: mm: zsmalloc: constify struct zs_pool name Constify `struct zs_pool' ->name. [akpm@inux-foundation.org: constify zpool_create_pool()'s `type' arg also] Signed-off-by: Sergey Senozhatsky Acked-by: Dan Streetman Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zpool.h | 6 ++++-- include/linux/zsmalloc.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/zpool.h b/include/linux/zpool.h index 1f405bee3cd5..2e97b7707dff 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -38,7 +38,7 @@ enum zpool_mapmode { bool zpool_has_pool(char *type); -struct zpool *zpool_create_pool(char *type, char *name, +struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp, const struct zpool_ops *ops); const char *zpool_get_type(struct zpool *pool); @@ -83,7 +83,9 @@ struct zpool_driver { atomic_t refcount; struct list_head list; - void *(*create)(char *name, gfp_t gfp, const struct zpool_ops *ops, + void *(*create)(const char *name, + gfp_t gfp, + const struct zpool_ops *ops, struct zpool *zpool); void (*destroy)(void *pool); diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 6398dfae53f1..34eb16098a33 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -41,7 +41,7 @@ struct zs_pool_stats { struct zs_pool; -struct zs_pool *zs_create_pool(char *name, gfp_t flags); +struct zs_pool *zs_create_pool(const char *name, gfp_t flags); void zs_destroy_pool(struct zs_pool *pool); unsigned long zs_malloc(struct zs_pool *pool, size_t size); -- cgit v1.2.3 From 474e4eeaf26b6c3298ca3ae9d0a705b0853efb2a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 6 Nov 2015 16:29:40 -0800 Subject: mm: drop page->slab_page Since 8456a648cf44 ("slab: use struct page for slab management") nobody uses slab_page field in struct page. Let's drop it. Signed-off-by: Kirill A. Shutemov Acked-by: Christoph Lameter Acked-by: David Rientjes Acked-by: Vlastimil Babka Reviewed-by: Andrea Arcangeli Cc: Joonsoo Kim Cc: Andi Kleen Cc: "Paul E. McKenney" Cc: Aneesh Kumar K.V Cc: Hugh Dickins Cc: Michal Hocko Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0a85da25a822..c0ec46df6c13 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -131,7 +131,6 @@ struct page { #endif }; - struct slab *slab_page; /* slab fields */ struct rcu_head rcu_head; /* Used by SLAB * when destroying via RCU */ -- cgit v1.2.3 From f1e61557f0230d51a3df8d825f2c156e75563bff Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 6 Nov 2015 16:29:50 -0800 Subject: mm: pack compound_dtor and compound_order into one word in struct page The patch halves space occupied by compound_dtor and compound_order in struct page. For compound_order, it's trivial long -> short conversion. For get_compound_page_dtor(), we now use hardcoded table for destructor lookup and store its index in the struct page instead of direct pointer to destructor. It shouldn't be a big trouble to maintain the table: we have only two destructor and NULL currently. This patch free up one word in tail pages for reuse. This is preparation for the next patch. Signed-off-by: Kirill A. Shutemov Reviewed-by: Michal Hocko Acked-by: Vlastimil Babka Reviewed-by: Andrea Arcangeli Cc: "Paul E. McKenney" Cc: Andi Kleen Cc: Aneesh Kumar K.V Cc: Christoph Lameter Cc: David Rientjes Cc: Hugh Dickins Cc: Joonsoo Kim Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 24 +++++++++++++++++++----- include/linux/mm_types.h | 6 ++---- 2 files changed, 21 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 906c46a05707..6581c21320cb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -568,18 +568,32 @@ int split_free_page(struct page *page); /* * Compound pages have a destructor function. Provide a * prototype for that function and accessor functions. - * These are _only_ valid on the head of a PG_compound page. + * These are _only_ valid on the head of a compound page. */ +typedef void compound_page_dtor(struct page *); + +/* Keep the enum in sync with compound_page_dtors array in mm/page_alloc.c */ +enum compound_dtor_id { + NULL_COMPOUND_DTOR, + COMPOUND_PAGE_DTOR, +#ifdef CONFIG_HUGETLB_PAGE + HUGETLB_PAGE_DTOR, +#endif + NR_COMPOUND_DTORS, +}; +extern compound_page_dtor * const compound_page_dtors[]; static inline void set_compound_page_dtor(struct page *page, - compound_page_dtor *dtor) + enum compound_dtor_id compound_dtor) { - page[1].compound_dtor = dtor; + VM_BUG_ON_PAGE(compound_dtor >= NR_COMPOUND_DTORS, page); + page[1].compound_dtor = compound_dtor; } static inline compound_page_dtor *get_compound_page_dtor(struct page *page) { - return page[1].compound_dtor; + VM_BUG_ON_PAGE(page[1].compound_dtor >= NR_COMPOUND_DTORS, page); + return compound_page_dtors[page[1].compound_dtor]; } static inline int compound_order(struct page *page) @@ -589,7 +603,7 @@ static inline int compound_order(struct page *page) return page[1].compound_order; } -static inline void set_compound_order(struct page *page, unsigned long order) +static inline void set_compound_order(struct page *page, unsigned int order) { page[1].compound_order = order; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c0ec46df6c13..e334ef79cb43 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -28,8 +28,6 @@ struct mem_cgroup; IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) #define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) -typedef void compound_page_dtor(struct page *); - /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -136,8 +134,8 @@ struct page { */ /* First tail page of compound page */ struct { - compound_page_dtor *compound_dtor; - unsigned long compound_order; + unsigned short int compound_dtor; + unsigned short int compound_order; }; #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS -- cgit v1.2.3 From 1d798ca3f16437c71ff63e36597ff07f9c12e4d6 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 6 Nov 2015 16:29:54 -0800 Subject: mm: make compound_head() robust Hugh has pointed that compound_head() call can be unsafe in some context. There's one example: CPU0 CPU1 isolate_migratepages_block() page_count() compound_head() !!PageTail() == true put_page() tail->first_page = NULL head = tail->first_page alloc_pages(__GFP_COMP) prep_compound_page() tail->first_page = head __SetPageTail(p); !!PageTail() == true The race is pure theoretical. I don't it's possible to trigger it in practice. But who knows. We can fix the race by changing how encode PageTail() and compound_head() within struct page to be able to update them in one shot. The patch introduces page->compound_head into third double word block in front of compound_dtor and compound_order. Bit 0 encodes PageTail() and the rest bits are pointer to head page if bit zero is set. The patch moves page->pmd_huge_pte out of word, just in case if an architecture defines pgtable_t into something what can have the bit 0 set. hugetlb_cgroup uses page->lru.next in the second tail page to store pointer struct hugetlb_cgroup. The patch switch it to use page->private in the second tail page instead. The space is free since ->first_page is removed from the union. The patch also opens possibility to remove HUGETLB_CGROUP_MIN_ORDER limitation, since there's now space in first tail page to store struct hugetlb_cgroup pointer. But that's out of scope of the patch. That means page->compound_head shares storage space with: - page->lru.next; - page->next; - page->rcu_head.next; That's too long list to be absolutely sure, but looks like nobody uses bit 0 of the word. page->rcu_head.next guaranteed[1] to have bit 0 clean as long as we use call_rcu(), call_rcu_bh(), call_rcu_sched(), or call_srcu(). But future call_rcu_lazy() is not allowed as it makes use of the bit and we can get false positive PageTail(). [1] http://lkml.kernel.org/g/20150827163634.GD4029@linux.vnet.ibm.com Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Reviewed-by: Andrea Arcangeli Cc: Hugh Dickins Cc: David Rientjes Cc: Vlastimil Babka Acked-by: Paul E. McKenney Cc: Aneesh Kumar K.V Cc: Andi Kleen Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb_cgroup.h | 4 +-- include/linux/mm.h | 53 ++-------------------------- include/linux/mm_types.h | 22 +++++++++--- include/linux/page-flags.h | 80 ++++++++++-------------------------------- 4 files changed, 41 insertions(+), 118 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 7edd30515298..24154c26d469 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -32,7 +32,7 @@ static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return NULL; - return (struct hugetlb_cgroup *)page[2].lru.next; + return (struct hugetlb_cgroup *)page[2].private; } static inline @@ -42,7 +42,7 @@ int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return -1; - page[2].lru.next = (void *)h_cg; + page[2].private = (unsigned long)h_cg; return 0; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 6581c21320cb..9671b6f23eda 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -430,46 +430,6 @@ static inline void compound_unlock_irqrestore(struct page *page, #endif } -static inline struct page *compound_head_by_tail(struct page *tail) -{ - struct page *head = tail->first_page; - - /* - * page->first_page may be a dangling pointer to an old - * compound page, so recheck that it is still a tail - * page before returning. - */ - smp_rmb(); - if (likely(PageTail(tail))) - return head; - return tail; -} - -/* - * Since either compound page could be dismantled asynchronously in THP - * or we access asynchronously arbitrary positioned struct page, there - * would be tail flag race. To handle this race, we should call - * smp_rmb() before checking tail flag. compound_head_by_tail() did it. - */ -static inline struct page *compound_head(struct page *page) -{ - if (unlikely(PageTail(page))) - return compound_head_by_tail(page); - return page; -} - -/* - * If we access compound page synchronously such as access to - * allocated page, there is no need to handle tail flag race, so we can - * check tail flag directly without any synchronization primitive. - */ -static inline struct page *compound_head_fast(struct page *page) -{ - if (unlikely(PageTail(page))) - return page->first_page; - return page; -} - /* * The atomic page->_mapcount, starts from -1: so that transitions * both from it and to it can be tracked, using atomic_inc_and_test @@ -518,7 +478,7 @@ static inline void get_huge_page_tail(struct page *page) VM_BUG_ON_PAGE(!PageTail(page), page); VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page); - if (compound_tail_refcounted(page->first_page)) + if (compound_tail_refcounted(compound_head(page))) atomic_inc(&page->_mapcount); } @@ -541,13 +501,7 @@ static inline struct page *virt_to_head_page(const void *x) { struct page *page = virt_to_page(x); - /* - * We don't need to worry about synchronization of tail flag - * when we call virt_to_head_page() since it is only called for - * already allocated page and this page won't be freed until - * this virt_to_head_page() is finished. So use _fast variant. - */ - return compound_head_fast(page); + return compound_head(page); } /* @@ -1586,8 +1540,7 @@ static inline bool ptlock_init(struct page *page) * with 0. Make sure nobody took it in use in between. * * It can happen if arch try to use slab for page table allocation: - * slab code uses page->slab_cache and page->first_page (for tail - * pages), which share storage with page->ptl. + * slab code uses page->slab_cache, which share storage with page->ptl. */ VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page); if (!ptlock_alloc(page)) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e334ef79cb43..bb91658c603f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -111,7 +111,13 @@ struct page { }; }; - /* Third double word block */ + /* + * Third double word block + * + * WARNING: bit 0 of the first word encode PageTail(). That means + * the rest users of the storage space MUST NOT use the bit to + * avoid collision and false-positive PageTail(). + */ union { struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! @@ -132,14 +138,23 @@ struct page { struct rcu_head rcu_head; /* Used by SLAB * when destroying via RCU */ - /* First tail page of compound page */ + /* Tail pages of compound page */ struct { + unsigned long compound_head; /* If bit zero is set */ + + /* First tail page only */ unsigned short int compound_dtor; unsigned short int compound_order; }; #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS - pgtable_t pmd_huge_pte; /* protected by page->ptl */ + struct { + unsigned long __pad; /* do not overlay pmd_huge_pte + * with compound_head to avoid + * possible bit 0 collision. + */ + pgtable_t pmd_huge_pte; /* protected by page->ptl */ + }; #endif }; @@ -160,7 +175,6 @@ struct page { #endif #endif struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ - struct page *first_page; /* Compound tail pages */ }; #ifdef CONFIG_MEMCG diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a525e5067484..bb53c7b86315 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -86,12 +86,7 @@ enum pageflags { PG_private, /* If pagecache, has fs-private data */ PG_private_2, /* If pagecache, has fs aux data */ PG_writeback, /* Page is under writeback */ -#ifdef CONFIG_PAGEFLAGS_EXTENDED PG_head, /* A head page */ - PG_tail, /* A tail page */ -#else - PG_compound, /* A compound page */ -#endif PG_swapcache, /* Swap page: swp_entry_t in private */ PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ @@ -398,85 +393,46 @@ static inline void set_page_writeback_keepwrite(struct page *page) test_set_page_writeback_keepwrite(page); } -#ifdef CONFIG_PAGEFLAGS_EXTENDED -/* - * System with lots of page flags available. This allows separate - * flags for PageHead() and PageTail() checks of compound pages so that bit - * tests can be used in performance sensitive paths. PageCompound is - * generally not used in hot code paths except arch/powerpc/mm/init_64.c - * and arch/powerpc/kvm/book3s_64_vio_hv.c which use it to detect huge pages - * and avoid handling those in real mode. - */ __PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head) -__PAGEFLAG(Tail, tail) -static inline int PageCompound(struct page *page) -{ - return page->flags & ((1L << PG_head) | (1L << PG_tail)); - -} -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline void ClearPageCompound(struct page *page) +static inline int PageTail(struct page *page) { - BUG_ON(!PageHead(page)); - ClearPageHead(page); + return READ_ONCE(page->compound_head) & 1; } -#endif - -#define PG_head_mask ((1L << PG_head)) -#else -/* - * Reduce page flag use as much as possible by overlapping - * compound page flags with the flags used for page cache pages. Possible - * because PageCompound is always set for compound pages and not for - * pages on the LRU and/or pagecache. - */ -TESTPAGEFLAG(Compound, compound) -__SETPAGEFLAG(Head, compound) __CLEARPAGEFLAG(Head, compound) - -/* - * PG_reclaim is used in combination with PG_compound to mark the - * head and tail of a compound page. This saves one page flag - * but makes it impossible to use compound pages for the page cache. - * The PG_reclaim bit would have to be used for reclaim or readahead - * if compound pages enter the page cache. - * - * PG_compound & PG_reclaim => Tail page - * PG_compound & ~PG_reclaim => Head page - */ -#define PG_head_mask ((1L << PG_compound)) -#define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim)) - -static inline int PageHead(struct page *page) +static inline void set_compound_head(struct page *page, struct page *head) { - return ((page->flags & PG_head_tail_mask) == PG_head_mask); + WRITE_ONCE(page->compound_head, (unsigned long)head + 1); } -static inline int PageTail(struct page *page) +static inline void clear_compound_head(struct page *page) { - return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask); + WRITE_ONCE(page->compound_head, 0); } -static inline void __SetPageTail(struct page *page) +static inline struct page *compound_head(struct page *page) { - page->flags |= PG_head_tail_mask; + unsigned long head = READ_ONCE(page->compound_head); + + if (unlikely(head & 1)) + return (struct page *) (head - 1); + return page; } -static inline void __ClearPageTail(struct page *page) +static inline int PageCompound(struct page *page) { - page->flags &= ~PG_head_tail_mask; -} + return PageHead(page) || PageTail(page); +} #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline void ClearPageCompound(struct page *page) { - BUG_ON((page->flags & PG_head_tail_mask) != (1 << PG_compound)); - clear_bit(PG_compound, &page->flags); + BUG_ON(!PageHead(page)); + ClearPageHead(page); } #endif -#endif /* !PAGEFLAGS_EXTENDED */ +#define PG_head_mask ((1L << PG_head)) #ifdef CONFIG_HUGETLB_PAGE int PageHuge(struct page *page); -- cgit v1.2.3 From d00181b96eb86c914cb327d1de974a1b71366e1b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 6 Nov 2015 16:29:57 -0800 Subject: mm: use 'unsigned int' for page order Let's try to be consistent about data type of page order. [sfr@canb.auug.org.au: fix build (type of pageblock_order)] [hughd@google.com: some configs end up with MAX_ORDER and pageblock_order having different types] Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Acked-by: Vlastimil Babka Reviewed-by: Andrea Arcangeli Cc: "Paul E. McKenney" Cc: Andi Kleen Cc: Aneesh Kumar K.V Cc: Christoph Lameter Cc: David Rientjes Cc: Joonsoo Kim Cc: Sergey Senozhatsky Signed-off-by: Stephen Rothwell Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 +++-- include/linux/pageblock-flags.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9671b6f23eda..00bad7793788 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -550,7 +550,7 @@ static inline compound_page_dtor *get_compound_page_dtor(struct page *page) return compound_page_dtors[page[1].compound_dtor]; } -static inline int compound_order(struct page *page) +static inline unsigned int compound_order(struct page *page) { if (!PageHead(page)) return 0; @@ -1810,7 +1810,8 @@ extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); extern __printf(3, 4) -void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...); +void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, + const char *fmt, ...); extern void setup_per_cpu_pageset(void); diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 2baeee12f48e..e942558b3585 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -44,7 +44,7 @@ enum pageblock_bits { #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE /* Huge page sizes are variable */ -extern int pageblock_order; +extern unsigned int pageblock_order; #else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ -- cgit v1.2.3 From 1965c8b7ac7dd147663faf77a66a693ac3ddcb85 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 6 Nov 2015 16:30:00 -0800 Subject: mm: use 'unsigned int' for compound_dtor/compound_order on 64BIT On 64 bit system we have enough space in struct page to encode compound_dtor and compound_order with unsigned int. On x86-64 it leads to slightly smaller code size due usesage of plain MOV instead of MOVZX (zero-extended move) or similar effect. allyesconfig: text data bss dec hex filename 159520446 48146736 72196096 279863278 10ae5fee vmlinux.pre 159520382 48146736 72196096 279863214 10ae5fae vmlinux.post On other architectures without native support of 16-bit data types the Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Reviewed-by: Andrea Arcangeli Cc: "Paul E. McKenney" Cc: Andi Kleen Cc: Aneesh Kumar K.V Cc: Christoph Lameter Cc: David Rientjes Cc: Hugh Dickins Cc: Joonsoo Kim Cc: Sergey Senozhatsky Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index bb91658c603f..f8d1492a114f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -143,8 +143,19 @@ struct page { unsigned long compound_head; /* If bit zero is set */ /* First tail page only */ +#ifdef CONFIG_64BIT + /* + * On 64 bit system we have enough space in struct page + * to encode compound_dtor and compound_order with + * unsigned int. It can help compiler generate better or + * smaller code on some archtectures. + */ + unsigned int compound_dtor; + unsigned int compound_order; +#else unsigned short int compound_dtor; unsigned short int compound_order; +#endif }; #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS -- cgit v1.2.3 From 9add850c211a39d5ab1a091d48795e21599a73d0 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 6 Nov 2015 16:30:09 -0800 Subject: include/linux/compiler-gcc.h: improve __visible documentation Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 0e3110a0b771..22ab246feed3 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -205,7 +205,10 @@ #if GCC_VERSION >= 40600 /* - * Tell the optimizer that something else uses this function or variable. + * When used with Link Time Optimization, gcc can optimize away C functions or + * variables which are referenced only from assembly code. __visible tells the + * optimizer that something else uses this function or variable, thus preventing + * this. */ #define __visible __attribute__((externally_visible)) #endif -- cgit v1.2.3 From e2eb53aa96754b97d158eff884dde88abbad925e Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Fri, 6 Nov 2015 16:30:58 -0800 Subject: bitops.h: improve sign_extend32()'s documentation It is often overlooked that sign_extend32(), despite its name, is safe to use for 16 and 8 bit types as well. This should help prevent sign extension being done manually some other way. Signed-off-by: Martin Kepplinger Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: George Spelvin Cc: Rasmus Villemoes Cc: Maxime Coquelin Cc: Denys Vlasenko Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index e63553386ae7..5629923a8701 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -164,6 +164,8 @@ static inline __u8 ror8(__u8 word, unsigned int shift) * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit * @value: value to sign extend * @index: 0 based bit index (0<=index<32) to sign bit + * + * This is safe to use for 16- and 8-bit types as well. */ static inline __s32 sign_extend32(__u32 value, int index) { -- cgit v1.2.3 From 48e203e21b29cd4b2c58403fe8bca68e2e854895 Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Fri, 6 Nov 2015 16:31:02 -0800 Subject: bitops.h: add sign_extend64() Months back, this was discussed, see https://lkml.org/lkml/2015/1/18/289 The result was the 64-bit version being "likely fine", "valuable" and "correct". The discussion fell asleep but since there are possible users, let's add it. Signed-off-by: Martin Kepplinger Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: George Spelvin Cc: Rasmus Villemoes Cc: Maxime Coquelin Cc: Denys Vlasenko Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 5629923a8701..2b8ed123ad36 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -173,6 +173,17 @@ static inline __s32 sign_extend32(__u32 value, int index) return (__s32)(value << shift) >> shift; } +/** + * sign_extend64 - sign extend a 64-bit value using specified bit as sign-bit + * @value: value to sign extend + * @index: 0 based bit index (0<=index<64) to sign bit + */ +static inline __s64 sign_extend64(__u64 value, int index) +{ + __u8 shift = 63 - index; + return (__s64)(value << shift) >> shift; +} + static inline unsigned fls_long(unsigned long l) { if (sizeof(l) == 4) -- cgit v1.2.3 From 0a9df786a6ae2f898114bdd242b64920dedf53bd Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 6 Nov 2015 16:31:20 -0800 Subject: lib/kasprintf.c: introduce kvasprintf_const This adds kvasprintf_const which tries to use kstrdup_const if possible: If the format string contains no % characters, or if the format string is exactly "%s", we delegate to kstrdup_const. Otherwise, we fall back to kvasprintf. Just as for kstrdup_const, the main motivation is to save memory by reusing .rodata when possible. The return value should be freed by kfree_const, just like for kstrdup_const. There is deliberately no kasprintf_const: In the vast majority of cases, the format string argument is a literal, so one can determine statically whether one could instead use kstrdup_const directly (which would also require one to change all corresponding kfree calls to kfree_const). Signed-off-by: Rasmus Villemoes Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5582410727cb..2c13f747ac2e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -413,6 +413,8 @@ extern __printf(2, 3) char *kasprintf(gfp_t gfp, const char *fmt, ...); extern __printf(2, 0) char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); +extern __printf(2, 0) +const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args); extern __scanf(2, 3) int sscanf(const char *, const char *, ...); -- cgit v1.2.3 From 8de1ee7ebfb4979c6444e81273e12e7a972c367d Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 6 Nov 2015 16:31:28 -0800 Subject: rbtree: clarify documentation of rbtree_postorder_for_each_entry_safe() I noticed that commit a20135ffbc44 ("writeback: don't drain bdi_writeback_congested on bdi destruction") added a usage of rbtree_postorder_for_each_entry_safe() in mm/backing-dev.c which appears to try to rb_erase() elements from an rbtree while iterating over it using rbtree_postorder_for_each_entry_safe(). Doing this will cause random nodes to be missed by the iteration because rb_erase() may rebalance the tree, changing the ordering that we're trying to iterate over. The previous documentation for rbtree_postorder_for_each_entry_safe() wasn't clear that this wasn't allowed, it was taken from the docs for list_for_each_entry_safe(), where erasing isn't a problem due to list_del() not reordering. Explicitly warn developers about this potential pit-fall. Note that I haven't fixed the actual issue that (it appears) the commit referenced above introduced (not familiar enough with that code). In general (and in this case), the patterns to follow are: - switch to rb_first() + rb_erase(), don't use rbtree_postorder_for_each_entry_safe(). - keep the postorder iteration and don't rb_erase() at all. Instead just clear the fields of rb_node & cgwb_congested_tree as required by other users of those structures. [akpm@linux-foundation.org: tweak comments] Signed-off-by: Cody P Schafer Cc: John de la Garza Cc: Michel Lespinasse Cc: Peter Zijlstra Cc: Rusty Russell Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rbtree.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 830c4992088d..a5aa7ae671f4 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -101,13 +101,21 @@ static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent }) /** - * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of - * given type safe against removal of rb_node entry + * rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of + * given type allowing the backing memory of @pos to be invalidated * * @pos: the 'type *' to use as a loop cursor. * @n: another 'type *' to use as temporary storage * @root: 'rb_root *' of the rbtree. * @field: the name of the rb_node field within 'type'. + * + * rbtree_postorder_for_each_entry_safe() provides a similar guarantee as + * list_for_each_entry_safe() and allows the iteration to continue independent + * of changes to @pos by the body of the loop. + * + * Note, however, that it cannot handle other modifications that re-order the + * rbtree it is iterating over. This includes calling rb_erase() on @pos, as + * rb_erase() may rebalance the tree, causing us to miss some nodes. */ #define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \ for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \ -- cgit v1.2.3 From 2e01fabe67ccaff1d59bda01e60a61f5fb0aa7b6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 6 Nov 2015 16:32:19 -0800 Subject: signals: kill block_all_signals() and unblock_all_signals() It is hardly possible to enumerate all problems with block_all_signals() and unblock_all_signals(). Just for example, 1. block_all_signals(SIGSTOP/etc) simply can't help if the caller is multithreaded. Another thread can dequeue the signal and force the group stop. 2. Even is the caller is single-threaded, it will "stop" anyway. It will not sleep, but it will spin in kernel space until SIGCONT or SIGKILL. And a lot more. In short, this interface doesn't work at all, at least the last 10+ years. Daniel said: Yeah the only times I played around with the DRM_LOCK stuff was when old drivers accidentally deadlocked - my impression is that the entire DRM_LOCK thing was never really tested properly ;-) Hence I'm all for purging where this leaks out of the drm subsystem. Signed-off-by: Oleg Nesterov Acked-by: Daniel Vetter Acked-by: Dave Airlie Cc: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index eeb5066a44fb..923ec1a9b2b4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1570,9 +1570,7 @@ struct task_struct { unsigned long sas_ss_sp; size_t sas_ss_size; - int (*notifier)(void *priv); - void *notifier_data; - sigset_t *notifier_mask; + struct callback_head *task_works; struct audit_context *audit_context; @@ -2476,9 +2474,6 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s return ret; } -extern void block_all_signals(int (*notifier)(void *priv), void *priv, - sigset_t *mask); -extern void unblock_all_signals(void); extern void release_task(struct task_struct * p); extern int send_sig_info(int, struct siginfo *, struct task_struct *); extern int force_sigsegv(int, struct task_struct *); -- cgit v1.2.3 From be0e6f290f78b84a3b21b8c8c46819c4514fe632 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 6 Nov 2015 16:32:22 -0800 Subject: signal: turn dequeue_signal_lock() into kernel_dequeue_signal() 1. Rename dequeue_signal_lock() to kernel_dequeue_signal(). This matches another "for kthreads only" kernel_sigaction() helper. 2. Remove the "tsk" and "mask" arguments, they are always current and current->blocked. And it is simply wrong if tsk != current. 3. We could also remove the 3rd "siginfo_t *info" arg but it looks potentially useful. However we can simplify the callers if we change kernel_dequeue_signal() to accept info => NULL. 4. Remove _irqsave, it is never called from atomic context. Signed-off-by: Oleg Nesterov Reviewed-by: Tejun Heo Cc: David Woodhouse Cc: Felipe Balbi Cc: Markus Pargmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 923ec1a9b2b4..3d54924b4b86 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2462,14 +2462,15 @@ extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); -static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) +static inline int kernel_dequeue_signal(siginfo_t *info) { - unsigned long flags; + struct task_struct *tsk = current; + siginfo_t __info; int ret; - spin_lock_irqsave(&tsk->sighand->siglock, flags); - ret = dequeue_signal(tsk, mask, info); - spin_unlock_irqrestore(&tsk->sighand->siglock, flags); + spin_lock_irq(&tsk->sighand->siglock); + ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info); + spin_unlock_irq(&tsk->sighand->siglock); return ret; } -- cgit v1.2.3 From 9a13049e83f346cb1cbd60c64e520a73c396af16 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 6 Nov 2015 16:32:25 -0800 Subject: signal: introduce kernel_signal_stop() to fix jffs2_garbage_collect_thread() jffs2_garbage_collect_thread() can race with SIGCONT and sleep in TASK_STOPPED state after it was already sent. Add the new helper, kernel_signal_stop(), which does this correctly. Signed-off-by: Oleg Nesterov Reviewed-by: Tejun Heo Cc: David Woodhouse Cc: Felipe Balbi Cc: Markus Pargmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3d54924b4b86..4069febaa34a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2475,6 +2475,16 @@ static inline int kernel_dequeue_signal(siginfo_t *info) return ret; } +static inline void kernel_signal_stop(void) +{ + spin_lock_irq(¤t->sighand->siglock); + if (current->jobctl & JOBCTL_STOP_DEQUEUED) + __set_current_state(TASK_STOPPED); + spin_unlock_irq(¤t->sighand->siglock); + + schedule(); +} + extern void release_task(struct task_struct * p); extern int send_sig_info(int, struct siginfo *, struct task_struct *); extern int force_sigsegv(int, struct task_struct *); -- cgit v1.2.3 From 002edb6f6f2a79bea50de11260ddc9572e6db731 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 6 Nov 2015 16:32:51 -0800 Subject: dma-mapping: tidy up dma_parms default handling Many DMA controllers and other devices set max_segment_size to indicate their scatter-gather capability, but have no interest in segment_boundary_mask. However, the existence of a dma_parms structure precludes the use of any default value, leaving them as zeros (assuming a properly kzalloc'ed structure). If a well-behaved IOMMU (or SWIOTLB) then tries to respect this by ensuring a mapped segment does not cross a zero-byte boundary, hilarity ensues. Since zero is a nonsensical value for either parameter, treat it as an indicator for "default", as might be expected. In the process, clean up a bit by replacing the bare constants with slightly more meaningful macros and removing the superfluous "else" statements. [akpm@linux-foundation.org: dma-mapping.h needs sizes.h for SZ_64K] Signed-off-by: Robin Murphy Reviewed-by: Sumit Semwal Acked-by: Marek Szyprowski Cc: Arnd Bergmann Cc: Sakari Ailus Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dma-mapping.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index ac07ff090919..2e551e2d2d03 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -1,6 +1,7 @@ #ifndef _LINUX_DMA_MAPPING_H #define _LINUX_DMA_MAPPING_H +#include #include #include #include @@ -145,7 +146,9 @@ static inline void arch_teardown_dma_ops(struct device *dev) { } static inline unsigned int dma_get_max_seg_size(struct device *dev) { - return dev->dma_parms ? dev->dma_parms->max_segment_size : 65536; + if (dev->dma_parms && dev->dma_parms->max_segment_size) + return dev->dma_parms->max_segment_size; + return SZ_64K; } static inline unsigned int dma_set_max_seg_size(struct device *dev, @@ -154,14 +157,15 @@ static inline unsigned int dma_set_max_seg_size(struct device *dev, if (dev->dma_parms) { dev->dma_parms->max_segment_size = size; return 0; - } else - return -EIO; + } + return -EIO; } static inline unsigned long dma_get_seg_boundary(struct device *dev) { - return dev->dma_parms ? - dev->dma_parms->segment_boundary_mask : 0xffffffff; + if (dev->dma_parms && dev->dma_parms->segment_boundary_mask) + return dev->dma_parms->segment_boundary_mask; + return DMA_BIT_MASK(32); } static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) @@ -169,8 +173,8 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) if (dev->dma_parms) { dev->dma_parms->segment_boundary_mask = mask; return 0; - } else - return -EIO; + } + return -EIO; } #ifndef dma_max_pfn -- cgit v1.2.3 From cb7ae262e230064ba282094b7e1f60a092448b72 Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Fri, 6 Nov 2015 16:33:01 -0800 Subject: include/linux/zutil.h: fix usage example of zlib_adler32() alder32 was renamed to zlib_adler32 since before 2.6.11. Signed-off-by: Anish Bhatt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zutil.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/zutil.h b/include/linux/zutil.h index 6adfa9a6ffe9..663689521759 100644 --- a/include/linux/zutil.h +++ b/include/linux/zutil.h @@ -68,10 +68,10 @@ typedef uLong (*check_func) (uLong check, const Byte *buf, An Adler-32 checksum is almost as reliable as a CRC32 but can be computed much faster. Usage example: - uLong adler = adler32(0L, NULL, 0); + uLong adler = zlib_adler32(0L, NULL, 0); while (read_buffer(buffer, length) != EOF) { - adler = adler32(adler, buffer, length); + adler = zlib_adler32(adler, buffer, length); } if (adler != original_adler) error(); */ -- cgit v1.2.3 From 95ad1f4a9358dff1dcf84bf5c9cc84caa9215f7f Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 7 Nov 2015 11:21:47 +0100 Subject: netfilter: ipset: Fix extension alignment The data extensions in ipset lacked the proper memory alignment and thus could lead to kernel crash on several architectures. Therefore the structures have been reorganized and alignment attributes added where needed. The patch was tested on armv7h by Gerhard Wiesinger and on x86_64, sparc64 by Jozsef Kadlecsik. Reported-by: Gerhard Wiesinger Tested-by: Gerhard Wiesinger Tested-by: Jozsef Kadlecsik Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 48bb01edcf30..0e1f433cc4b7 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -421,7 +421,7 @@ extern void ip_set_free(void *members); extern int ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr); extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr); extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], - size_t len); + size_t len, size_t align); extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext); -- cgit v1.2.3 From dece16353ef47d8d33f5302bc158072a9d65e26f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 5 Nov 2015 10:41:16 -0700 Subject: block: change ->make_request_fn() and users to return a queue cookie No functional changes in this patch, but it prepares us for returning a more useful cookie related to the IO that was queued up. Signed-off-by: Jens Axboe Acked-by: Christoph Hellwig Acked-by: Keith Busch --- include/linux/blk_types.h | 24 ++++++++++++++++++++++++ include/linux/blkdev.h | 4 ++-- include/linux/fs.h | 2 +- include/linux/lightnvm.h | 2 +- 4 files changed, 28 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index e8130138f29d..641e5a3ed58c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -244,4 +244,28 @@ enum rq_flag_bits { #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) #define REQ_NO_TIMEOUT (1ULL << __REQ_NO_TIMEOUT) +typedef unsigned int blk_qc_t; +#define BLK_QC_T_NONE -1U +#define BLK_QC_T_SHIFT 16 + +static inline bool blk_qc_t_valid(blk_qc_t cookie) +{ + return cookie != BLK_QC_T_NONE; +} + +static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num) +{ + return tag | (queue_num << BLK_QC_T_SHIFT); +} + +static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) +{ + return cookie >> BLK_QC_T_SHIFT; +} + +static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) +{ + return cookie & 0xffff; +} + #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d045ca8487af..5ee0f5243025 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -209,7 +209,7 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; typedef void (request_fn_proc) (struct request_queue *q); -typedef void (make_request_fn) (struct request_queue *q, struct bio *bio); +typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); @@ -761,7 +761,7 @@ static inline void rq_flush_dcache_pages(struct request *rq) extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); -extern void generic_make_request(struct bio *bio); +extern blk_qc_t generic_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 72d8a844c692..bcca36e4bc1e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2625,7 +2625,7 @@ static inline void remove_inode_hash(struct inode *inode) extern void inode_sb_list_add(struct inode *inode); #ifdef CONFIG_BLOCK -extern void submit_bio(int, struct bio *); +extern blk_qc_t submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); #endif extern int set_blocksize(struct block_device *, int); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 5ebd70d12f35..69c9057e1ab8 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -426,7 +426,7 @@ static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev, return ppa; } -typedef void (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); +typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); typedef int (nvm_tgt_end_io_fn)(struct nvm_rq *, int); typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int); -- cgit v1.2.3 From 05229beeddf7e75e2e616ddaad4b70e7fca9528d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 5 Nov 2015 10:44:55 -0700 Subject: block: add block polling support Add basic support for polling for specific IO to complete. This uses the cookie that blk-mq passes back, which enables the block layer to pass this cookie to the driver to spin for a specific request. This will be combined with request latency tracking, so we can make qualified decisions about when to poll and when not to. For now, for benchmark purposes, we add a sysfs file that controls whether polling is enabled or not. Signed-off-by: Jens Axboe Acked-by: Christoph Hellwig Acked-by: Keith Busch --- include/linux/blk-mq.h | 10 ++++++++++ include/linux/blkdev.h | 3 +++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 83cc9d4e5455..daf17d70aeca 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -59,6 +59,9 @@ struct blk_mq_hw_ctx { struct blk_mq_cpu_notifier cpu_notifier; struct kobject kobj; + + unsigned long poll_invoked; + unsigned long poll_success; }; struct blk_mq_tag_set { @@ -97,6 +100,8 @@ typedef void (exit_request_fn)(void *, struct request *, unsigned int, typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); typedef void (busy_tag_iter_fn)(struct request *, void *, bool); +typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); + struct blk_mq_ops { /* @@ -114,6 +119,11 @@ struct blk_mq_ops { */ timeout_fn *timeout; + /* + * Called to poll for completion of a specific tag. + */ + poll_fn *poll; + softirq_done_fn *complete; /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5ee0f5243025..3fe27f8d91f0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -487,6 +487,7 @@ struct request_queue { #define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ #define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/ +#define QUEUE_FLAG_POLL 22 /* IO polling enabled if set */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -814,6 +815,8 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *, extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); +bool blk_poll(struct request_queue *q, blk_qc_t cookie); + static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { return bdev->bd_disk->queue; /* this is never NULL */ -- cgit v1.2.3 From 5037835c1f3eabf4f22163fc0278dd87165f8957 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 5 Oct 2015 16:33:36 -0600 Subject: coredump: add DAX filtering for ELF coredumps Add two new flags to the existing coredump mechanism for ELF files to allow us to explicitly filter DAX mappings. This is desirable because DAX mappings, like hugetlb mappings, have the potential to be very large. Update the coredump_filter documentation in Documentation/filesystems/proc.txt so that it addresses the new DAX coredump flags. Also update the documented default value of coredump_filter to be consistent with the core(5) man page. The documentation being updated talks about bit 4, Dump ELF headers, which is enabled if CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is turned on in the kernel config. This kernel config option defaults to "y" if both ELF binaries and coredump are enabled. Signed-off-by: Ross Zwisler Acked-by: Jeff Moyer Signed-off-by: Dan Williams --- include/linux/sched.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b7b9501b41af..3c02d92ed23b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -483,9 +483,11 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_DUMP_ELF_HEADERS 6 #define MMF_DUMP_HUGETLB_PRIVATE 7 #define MMF_DUMP_HUGETLB_SHARED 8 +#define MMF_DUMP_DAX_PRIVATE 9 +#define MMF_DUMP_DAX_SHARED 10 #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS -#define MMF_DUMP_FILTER_BITS 7 +#define MMF_DUMP_FILTER_BITS 9 #define MMF_DUMP_FILTER_MASK \ (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) #define MMF_DUMP_FILTER_DEFAULT \ -- cgit v1.2.3 From c8299cb605b27dd5a49f7a69e48fd23e5a206298 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Mon, 9 Nov 2015 14:58:10 -0800 Subject: kernel.h: make abs() work with 64-bit types For 64-bit arguments, the abs macro casts it to an int which leads to lost precision and may cause incorrect results. To deal with 64-bit types abs64 macro has been introduced but still there are places where abs macro is used incorrectly. To deal with the problem, expand abs macro such that it operates on s64 type when dealing with 64-bit types while still returning long when dealing with smaller types. This fixes one known bug (per John): The internal clocksteering done for fine-grained error correction uses a : logarithmic approximation, so any time adjtimex() adjusts the clock : steering, timekeeping_freqadjust() quickly approximates the correct clock : frequency over a series of ticks. : : Unfortunately, the logic in timekeeping_freqadjust(), introduced in commit : dc491596f639438 (Rework frequency adjustments to work better w/ nohz), : used the abs() function with a s64 error value to calculate the size of : the approximated adjustment to be made. : : Per include/linux/kernel.h: "abs() should not be used for 64-bit types : (s64, u64, long long) - use abs64()". : : Thus on 32-bit platforms, this resulted in the clocksteering to take a : quite dampended random walk trying to converge on the proper frequency, : which caused the adjustments to be made much slower then intended (most : easily observed when large adjustments are made). Signed-off-by: Michal Nazarewicz Reported-by: John Stultz Tested-by: John Stultz Cc: Ingo Molnar Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Masami Hiramatsu Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2c13f747ac2e..05ce782d53ab 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -200,28 +200,31 @@ extern int _cond_resched(void); #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) -/* - * abs() handles unsigned and signed longs, ints, shorts and chars. For all - * input types abs() returns a signed long. - * abs() should not be used for 64-bit types (s64, u64, long long) - use abs64() - * for those. +/** + * abs - return absolute value of an argument + * @x: the value. If it is unsigned type, it is converted to signed type first + * (s64, long or int depending on its size). + * + * Return: an absolute value of x. If x is 64-bit, macro's return type is s64, + * otherwise it is signed long. */ -#define abs(x) ({ \ - long ret; \ - if (sizeof(x) == sizeof(long)) { \ - long __x = (x); \ - ret = (__x < 0) ? -__x : __x; \ - } else { \ - int __x = (x); \ - ret = (__x < 0) ? -__x : __x; \ - } \ - ret; \ - }) - -#define abs64(x) ({ \ - s64 __x = (x); \ - (__x < 0) ? -__x : __x; \ - }) +#define abs(x) __builtin_choose_expr(sizeof(x) == sizeof(s64), ({ \ + s64 __x = (x); \ + (__x < 0) ? -__x : __x; \ + }), ({ \ + long ret; \ + if (sizeof(x) == sizeof(long)) { \ + long __x = (x); \ + ret = (__x < 0) ? -__x : __x; \ + } else { \ + int __x = (x); \ + ret = (__x < 0) ? -__x : __x; \ + } \ + ret; \ + })) + +/* Deprecated, use abs instead. */ +#define abs64(x) abs((s64)(x)) /** * reciprocal_scale - "scale" a value into range [0, ep_ro) -- cgit v1.2.3 From 79211c8ed19c055ca105502c8733800d442a0ae6 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 9 Nov 2015 14:58:13 -0800 Subject: remove abs64() Switch everything to the new and more capable implementation of abs(). Mainly to give the new abs() a bit of a workout. Cc: Michal Nazarewicz Cc: John Stultz Cc: Ingo Molnar Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Masami Hiramatsu Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 05ce782d53ab..350dfb08aee3 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -223,9 +223,6 @@ extern int _cond_resched(void); ret; \ })) -/* Deprecated, use abs instead. */ -#define abs64(x) abs((s64)(x)) - /** * reciprocal_scale - "scale" a value into range [0, ep_ro) * @val: value -- cgit v1.2.3 From 77c5b5da02f0a30d61144a546c4ef3657e3b817d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 9 Nov 2015 14:58:23 -0800 Subject: kmap_atomic_to_page() has no users, remove it Removal started in commit 5bbeed12bdc3 ("sparc32: drop unused kmap_atomic_to_page"). Let's do it across the whole tree. Signed-off-by: Nicolas Pitre Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 6aefcd0031a6..bb3f3297062a 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -78,7 +78,6 @@ static inline void __kunmap_atomic(void *addr) } #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) -#define kmap_atomic_to_page(ptr) virt_to_page(ptr) #define kmap_flush_unused() do {} while(0) #endif -- cgit v1.2.3 From 7bc4f1d281bc1f807fd0c9aaa2f2d333b6508790 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Mon, 9 Nov 2015 14:58:26 -0800 Subject: include/linux/kdev_t.h: remove unused huge_valid_dev() There's no user of huge_valid_dev() any more, so remove it. No functional change. Signed-off-by: Yaowei Bai Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kdev_t.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h index c838abe3ee0a..a546d206c7f3 100644 --- a/include/linux/kdev_t.h +++ b/include/linux/kdev_t.h @@ -54,11 +54,6 @@ static inline dev_t new_decode_dev(u32 dev) return MKDEV(major, minor); } -static inline int huge_valid_dev(dev_t dev) -{ - return 1; -} - static inline u64 huge_encode_dev(dev_t dev) { return new_encode_dev(dev); -- cgit v1.2.3 From 8b9758b9c6f65f55c94370636c04e976edc93e1a Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Mon, 9 Nov 2015 14:58:28 -0800 Subject: include/linux/kdev_t.h: old/new_valid_dev() can return bool Make old/new_valid_dev return bool due to these two particular functions only using either one or zero as their return value. No functional change. Signed-off-by: Yaowei Bai Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kdev_t.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h index a546d206c7f3..052c7b32cc91 100644 --- a/include/linux/kdev_t.h +++ b/include/linux/kdev_t.h @@ -20,7 +20,7 @@ }) /* acceptable for old filesystems */ -static inline int old_valid_dev(dev_t dev) +static inline bool old_valid_dev(dev_t dev) { return MAJOR(dev) < 256 && MINOR(dev) < 256; } @@ -35,7 +35,7 @@ static inline dev_t old_decode_dev(u16 val) return MKDEV((val >> 8) & 255, val & 255); } -static inline int new_valid_dev(dev_t dev) +static inline bool new_valid_dev(dev_t dev) { return 1; } -- cgit v1.2.3 From 35181e86df97e4223f4a28fb33e2bcf3b73de141 Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Tue, 20 Oct 2015 15:39:03 +0800 Subject: KVM: x86: Add a common TSC scaling function VMX and SVM calculate the TSC scaling ratio in a similar logic, so this patch generalizes it to a common TSC scaling function. Signed-off-by: Haozhong Zhang [Inline the multiplication and shift steps into mul_u64_u64_shr. Remove BUG_ON. - Paolo] Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + include/linux/math64.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 242a6d2b53ff..5706a2108f0a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1183,4 +1183,5 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ + #endif diff --git a/include/linux/math64.h b/include/linux/math64.h index c45c089bfdac..44282ec7b682 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -142,6 +142,13 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) } #endif /* mul_u64_u32_shr */ +#ifndef mul_u64_u64_shr +static inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift) +{ + return (u64)(((unsigned __int128)a * mul) >> shift); +} +#endif /* mul_u64_u64_shr */ + #else #ifndef mul_u64_u32_shr @@ -161,6 +168,50 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) } #endif /* mul_u64_u32_shr */ +#ifndef mul_u64_u64_shr +static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift) +{ + union { + u64 ll; + struct { +#ifdef __BIG_ENDIAN + u32 high, low; +#else + u32 low, high; +#endif + } l; + } rl, rm, rn, rh, a0, b0; + u64 c; + + a0.ll = a; + b0.ll = b; + + rl.ll = (u64)a0.l.low * b0.l.low; + rm.ll = (u64)a0.l.low * b0.l.high; + rn.ll = (u64)a0.l.high * b0.l.low; + rh.ll = (u64)a0.l.high * b0.l.high; + + /* + * Each of these lines computes a 64-bit intermediate result into "c", + * starting at bits 32-95. The low 32-bits go into the result of the + * multiplication, the high 32-bits are carried into the next step. + */ + rl.l.high = c = (u64)rl.l.high + rm.l.low + rn.l.low; + rh.l.low = c = (c >> 32) + rm.l.high + rn.l.high + rh.l.low; + rh.l.high = (c >> 32) + rh.l.high; + + /* + * The 128-bit result of the multiplication is in rl.ll and rh.ll, + * shift it right and throw away the high part of the result. + */ + if (shift == 0) + return rl.ll; + if (shift < 64) + return (rl.ll >> shift) | (rh.ll << (64 - shift)); + return rh.ll >> (shift & 63); +} +#endif /* mul_u64_u64_shr */ + #endif #endif /* _LINUX_MATH64_H */ -- cgit v1.2.3 From 381d585c80e34988269bd7901ad910981e900be1 Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Tue, 20 Oct 2015 15:39:04 +0800 Subject: KVM: x86: Replace call-back set_tsc_khz() with a common function Both VMX and SVM propagate virtual_tsc_khz in the same way, so this patch removes the call-back set_tsc_khz() and replaces it with a common function. Signed-off-by: Haozhong Zhang Signed-off-by: Paolo Bonzini --- include/linux/math64.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/math64.h b/include/linux/math64.h index 44282ec7b682..6e8b5b270ffe 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -214,4 +214,33 @@ static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift) #endif +#ifndef mul_u64_u32_div +static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor) +{ + union { + u64 ll; + struct { +#ifdef __BIG_ENDIAN + u32 high, low; +#else + u32 low, high; +#endif + } l; + } u, rl, rh; + + u.ll = a; + rl.ll = (u64)u.l.low * mul; + rh.ll = (u64)u.l.high * mul + rl.l.high; + + /* Bits 32-63 of the result will be in rh.l.low. */ + rl.l.high = do_div(rh.ll, divisor); + + /* Bits 0-31 of the result will be in rl.l.low. */ + do_div(rl.ll, divisor); + + rl.l.high = rh.l.low; + return rl.ll; +} +#endif /* mul_u64_u32_div */ + #endif /* _LINUX_MATH64_H */ -- cgit v1.2.3 From f70cd6b07e629f367bb9b1ac9d0e3e669eb325c0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Oct 2015 02:39:55 +0100 Subject: context_tracking: remove duplicate enabled check All calls to context_tracking_enter and context_tracking_exit are already checking context_tracking_is_enabled, except the context_tracking_user_enter and context_tracking_user_exit functions left in for the benefit of assembly calls. Pull the check up to those functions, by making them simple wrappers around the user_enter and user_exit inline functions. Cc: Frederic Weisbecker Cc: Paul McKenney Reviewed-by: Rik van Riel Tested-by: Rik van Riel Acked-by: Andy Lutomirski Signed-off-by: Paolo Bonzini --- include/linux/context_tracking.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 008fc67d0d96..6ef136ff0897 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -18,13 +18,13 @@ extern void context_tracking_user_exit(void); static inline void user_enter(void) { if (context_tracking_is_enabled()) - context_tracking_user_enter(); + context_tracking_enter(CONTEXT_USER); } static inline void user_exit(void) { if (context_tracking_is_enabled()) - context_tracking_user_exit(); + context_tracking_exit(CONTEXT_USER); } static inline enum ctx_state exception_enter(void) -- cgit v1.2.3 From d0e536d89395ecd8ab78fe999dc4d6f5d140ce46 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Oct 2015 02:39:56 +0100 Subject: context_tracking: avoid irq_save/irq_restore on guest entry and exit guest_enter and guest_exit must be called with interrupts disabled, since they take the vtime_seqlock with write_seq{lock,unlock}. Therefore, it is not necessary to check for exceptions, nor to save/restore the IRQ state, when context tracking functions are called by guest_enter and guest_exit. Split the body of context_tracking_entry and context_tracking_exit out to __-prefixed functions, and use them from KVM. Rik van Riel has measured this to speed up a tight vmentry/vmexit loop by about 2%. Cc: Andy Lutomirski Cc: Frederic Weisbecker Cc: Paul McKenney Reviewed-by: Rik van Riel Tested-by: Rik van Riel Signed-off-by: Paolo Bonzini --- include/linux/context_tracking.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 6ef136ff0897..68b575afe5f5 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -10,6 +10,10 @@ #ifdef CONFIG_CONTEXT_TRACKING extern void context_tracking_cpu_set(int cpu); +/* Called with interrupts disabled. */ +extern void __context_tracking_enter(enum ctx_state state); +extern void __context_tracking_exit(enum ctx_state state); + extern void context_tracking_enter(enum ctx_state state); extern void context_tracking_exit(enum ctx_state state); extern void context_tracking_user_enter(void); @@ -88,13 +92,13 @@ static inline void guest_enter(void) current->flags |= PF_VCPU; if (context_tracking_is_enabled()) - context_tracking_enter(CONTEXT_GUEST); + __context_tracking_enter(CONTEXT_GUEST); } static inline void guest_exit(void) { if (context_tracking_is_enabled()) - context_tracking_exit(CONTEXT_GUEST); + __context_tracking_exit(CONTEXT_GUEST); if (vtime_accounting_enabled()) vtime_guest_exit(current); -- cgit v1.2.3 From d1cd21427747f15920cd726f5f67a07880e7dee4 Mon Sep 17 00:00:00 2001 From: Jonathan Richardson Date: Fri, 16 Oct 2015 17:40:58 -0700 Subject: pwm: Set enable state properly on failed call to enable The pwm_enable() function didn't clear the enabled bit if a call to the driver's ->enable() callback returned an error. The result was that the state of the PWM core was wrong. Clearing the bit when enable returns an error ensures the state is properly set. Tested-by: Jonathan Richardson Reviewed-by: Dmitry Torokhov Signed-off-by: Jonathan Richardson [thierry.reding@gmail.com: add missing kerneldoc for the lock] Signed-off-by: Thierry Reding --- include/linux/pwm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index d681f6875aef..cfc3ed46cad2 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -2,6 +2,7 @@ #define __LINUX_PWM_H #include +#include #include struct pwm_device; @@ -87,6 +88,7 @@ enum { * @pwm: global index of the PWM device * @chip: PWM chip providing this PWM device * @chip_data: chip-private data associated with the PWM device + * @lock: used to serialize accesses to the PWM device where necessary * @period: period of the PWM signal (in nanoseconds) * @duty_cycle: duty cycle of the PWM signal (in nanoseconds) * @polarity: polarity of the PWM signal @@ -98,6 +100,7 @@ struct pwm_device { unsigned int pwm; struct pwm_chip *chip; void *chip_data; + struct mutex lock; unsigned int period; unsigned int duty_cycle; -- cgit v1.2.3 From aabc92bbe3cfe4c545f8ccdaaeeea012a46f0abf Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 10 Nov 2015 14:31:18 +0100 Subject: net: add __netdev_alloc_pcpu_stats() to indicate gfp flags nf_tables may create percpu counters from the packet path through its dynamic set instantiation infrastructure, so we need a way to allocate this through GFP_ATOMIC. Signed-off-by: Pablo Neira Ayuso Acked-by: David S. Miller --- include/linux/netdevice.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2c00772bd136..e9d0c8a75380 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2068,20 +2068,23 @@ struct pcpu_sw_netstats { struct u64_stats_sync syncp; }; -#define netdev_alloc_pcpu_stats(type) \ -({ \ - typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \ - if (pcpu_stats) { \ - int __cpu; \ - for_each_possible_cpu(__cpu) { \ - typeof(type) *stat; \ - stat = per_cpu_ptr(pcpu_stats, __cpu); \ - u64_stats_init(&stat->syncp); \ - } \ - } \ - pcpu_stats; \ +#define __netdev_alloc_pcpu_stats(type, gfp) \ +({ \ + typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\ + if (pcpu_stats) { \ + int __cpu; \ + for_each_possible_cpu(__cpu) { \ + typeof(type) *stat; \ + stat = per_cpu_ptr(pcpu_stats, __cpu); \ + u64_stats_init(&stat->syncp); \ + } \ + } \ + pcpu_stats; \ }) +#define netdev_alloc_pcpu_stats(type) \ + __netdev_alloc_pcpu_stats(type, GFP_KERNEL); + #include /* netdevice notifier chain. Please remember to update the rtnetlink -- cgit v1.2.3 From b1d06b60e90cd5016798b9984f8e420e753f4846 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 6 Nov 2015 19:28:22 -0800 Subject: of: Provide static inline function for of_translate_address if needed If OF_ADDRESS is not configured, builds can fail with errors such as drivers/net/ethernet/hisilicon/hns_mdio.c: In function 'hns_mdio_bus_name': drivers/net/ethernet/hisilicon/hns_mdio.c:411:3: error: implicit declaration of function 'of_translate_address' as currently seen when building sparc:allmodconfig. Introduce a static inline function if OF_ADDRESS is not configured to fix the build failure. Return OF_BAD_ADDR in this case. For this to work, the definition of OF_BAD_ADDR has to be moved outside CONFIG_OF conditional code. Fixes: 876133d3161d ("net: hisilicon: add OF dependency") Cc: Arnd Bergmann Signed-off-by: Guenter Roeck Reviewed-by: Arnd Bergmann Reviewed-by: Frank Rowand Signed-off-by: Rob Herring --- include/linux/of.h | 4 ++-- include/linux/of_address.h | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 2194b8ca41f9..dd10626a615f 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -126,6 +126,8 @@ extern raw_spinlock_t devtree_lock; #define OF_POPULATED 3 /* device already created for the node */ #define OF_POPULATED_BUS 4 /* of_platform_populate recursed to children of this node */ +#define OF_BAD_ADDR ((u64)-1) + #ifdef CONFIG_OF void of_core_init(void); @@ -229,8 +231,6 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size) #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) -#define OF_BAD_ADDR ((u64)-1) - static inline const char *of_node_full_name(const struct device_node *np) { return np ? np->full_name : ""; diff --git a/include/linux/of_address.h b/include/linux/of_address.h index d88e81be6368..507daad0bc8d 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -57,6 +57,13 @@ extern int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size); extern bool of_dma_is_coherent(struct device_node *np); #else /* CONFIG_OF_ADDRESS */ + +static inline u64 of_translate_address(struct device_node *np, + const __be32 *addr) +{ + return OF_BAD_ADDR; +} + static inline struct device_node *of_find_matching_node_by_address( struct device_node *from, const struct of_device_id *matches, -- cgit v1.2.3 From 5c50002963369c7c622b18ff751719eadbe225c5 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 13 Oct 2015 16:51:02 -0600 Subject: vfs: remove unused wrapper block_page_mkwrite() The function currently called "__block_page_mkwrite()" used to be called "block_page_mkwrite()" until a wrapper for this function was added by: commit 24da4fab5a61 ("vfs: Create __block_page_mkwrite() helper passing error values back") This wrapper, the current "block_page_mkwrite()", is currently unused. __block_page_mkwrite() is used directly by ext4, nilfs2 and xfs. Remove the unused wrapper, rename __block_page_mkwrite() back to block_page_mkwrite() and update the comment above block_page_mkwrite(). Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Jan Kara Cc: Christoph Hellwig Cc: Al Viro Signed-off-by: Al Viro --- include/linux/buffer_head.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index e6797ded700e..89d9aa9e79bf 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -227,8 +227,6 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); -int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - get_block_t get_block); int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); /* Convert errno to return value from ->page_mkwrite() call */ -- cgit v1.2.3 From c8fffa643583e00eb9a783abbca251b11bc0d163 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Thu, 8 Oct 2015 17:07:20 -0600 Subject: vfs: remove stale comment in inode_operations The big warning comment that is currently at the end of struct inode_operations was added as part of this commit: 4aa7c6346be3 ("vfs: add i_op->dentry_open()") It was added to warn people not to use the newly added 'dentry_open' function pointer. This function pointer was removed as part of this commit: 4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and f_inode to the underlay") The comment was left behind and now refers to nothing, so remove it. Signed-off-by: Ross Zwisler Signed-off-by: Al Viro --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9a1cb8c605e0..f3bfbd7d3fa9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1665,8 +1665,6 @@ struct inode_operations { umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); int (*set_acl)(struct inode *, struct posix_acl *, int); - - /* WARNING: probably going away soon, do not use! */ } ____cacheline_aligned; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, -- cgit v1.2.3 From c0a9f72c156baf1e88c33c6ba4450647af1b8804 Mon Sep 17 00:00:00 2001 From: Alex Smith Date: Mon, 12 Oct 2015 10:40:43 +0100 Subject: irqchip: irq-mips-gic: Provide function to map GIC user section The GIC provides a "user-mode visible" section containing a mirror of the counter registers which can be mapped into user memory. This will be used by the VDSO time function implementations, so provide a function to map it in. When the GIC is not enabled in Kconfig a dummy inline version of this function is provided, along with "#define gic_present 0", so that we don't have to litter the VDSO code with ifdefs. [markos.chandras@imgtec.com: - Move mapping code to arch/mips/kernel/vdso.c and use a resource type to get the GIC usermode information - Avoid renaming function arguments and use __gic_base_addr to hold the base GIC address prior to ioremap.] [ralf@linux-mips.org: Fix up gic_get_usm_range() to compile and make inline again.] Signed-off-by: Alex Smith Signed-off-by: Markos Chandras Reviewed-by: Marc Zyngier Cc: Thomas Gleixner Cc: Jason Cooper Cc: Marc Zyngier Cc: Alex Smith Cc: Markos Chandras Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: http://patchwork.linux-mips.org/patch/11281/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 4e6861605050..ce824db48d64 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -9,6 +9,7 @@ #define __LINUX_IRQCHIP_MIPS_GIC_H #include +#include #define GIC_MAX_INTRS 256 @@ -245,6 +246,8 @@ #define GIC_SHARED_TO_HWIRQ(x) (GIC_SHARED_HWIRQ_BASE + (x)) #define GIC_HWIRQ_TO_SHARED(x) ((x) - GIC_SHARED_HWIRQ_BASE) +#ifdef CONFIG_MIPS_GIC + extern unsigned int gic_present; extern void gic_init(unsigned long gic_base_addr, @@ -264,4 +267,18 @@ extern unsigned int plat_ipi_resched_int_xlate(unsigned int); extern int gic_get_c0_compare_int(void); extern int gic_get_c0_perfcount_int(void); extern int gic_get_c0_fdc_int(void); +extern int gic_get_usm_range(struct resource *gic_usm_res); + +#else /* CONFIG_MIPS_GIC */ + +#define gic_present 0 + +static inline int gic_get_usm_range(struct resource *gic_usm_res) +{ + /* Shouldn't be called. */ + return -1; +} + +#endif /* CONFIG_MIPS_GIC */ + #endif /* __LINUX_IRQCHIP_MIPS_GIC_H */ -- cgit v1.2.3 From e3a7a3bf362e2a8acc301e5eaec2631e740a8a95 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 11 Nov 2015 09:37:34 -0700 Subject: block: don't hardcode blk_qc_t -> tag mask Use the shift/mask we use elsewhere. Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 641e5a3ed58c..0fb65843ec1e 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -265,7 +265,7 @@ static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) { - return cookie & 0xffff; + return cookie & ((1u << BLK_QC_T_SHIFT) - 1); } #endif /* __LINUX_BLK_TYPES_H */ -- cgit v1.2.3 From e409de992e3ea3674393465f07cc71c948edd87a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 4 Oct 2015 19:18:52 +0200 Subject: 9p: xattr simplifications Now that the xattr handler is passed to the xattr handler operations, we can use the same get and set operations for the user, trusted, and security xattr namespaces. In those namespaces, we can access the full attribute name by "reattaching" the name prefix the vfs has skipped for us. Add a xattr_full_name helper to make this obvious in the code. For the "system.posix_acl_access" and "system.posix_acl_default" attributes, handler->prefix is the full attribute name; the suffix is the empty string. Signed-off-by: Andreas Gruenbacher Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Cc: v9fs-developer@lists.sourceforge.net Signed-off-by: Al Viro --- include/linux/xattr.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 91b0a68d38dc..89474b9d260c 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -21,15 +21,19 @@ struct dentry; struct xattr_handler { const char *prefix; - int flags; /* fs private flags passed back to the handlers */ - size_t (*list)(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int handler_flags); - int (*get)(struct dentry *dentry, const char *name, void *buffer, - size_t size, int handler_flags); - int (*set)(struct dentry *dentry, const char *name, const void *buffer, - size_t size, int flags, int handler_flags); + int flags; /* fs private flags */ + size_t (*list)(const struct xattr_handler *, struct dentry *dentry, + char *list, size_t list_size, const char *name, + size_t name_len); + int (*get)(const struct xattr_handler *, struct dentry *dentry, + const char *name, void *buffer, size_t size); + int (*set)(const struct xattr_handler *, struct dentry *dentry, + const char *name, const void *buffer, size_t size, + int flags); }; +const char *xattr_full_name(const struct xattr_handler *, const char *); + struct xattr { const char *name; void *value; -- cgit v1.2.3 From 66189961e986e53ae39822898fc2ce88f44c61bb Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 12 Nov 2015 19:35:26 +0200 Subject: net/mlx5e: Added self loopback prevention Prevent outgoing multicast frames from looping back to the RX queue. By introducing new HW capability self_lb_en_modifiable, which indicates the support to modify self_lb_en bit in modify_tir command. When this capability is set we can prevent TIRs from sending back loopback multicast traffic to their own RQs, by "refreshing TIRs" with modify_tir command, on every time new channels (SQs/RQs) are created at device open. This is needed since TIRs are static and only allocated once on driver load, and the loopback decision is under their responsibility. Fixes issues of the kind: "IPv6: eth2: IPv6 duplicate address fe80::e61d:2dff:fe5c:f2e9 detected!" The issue is seen since the IPv6 solicitations multicast messages are loopedback and the network stack thinks they are coming from another host. Fixes: 5c50368f3831 ("net/mlx5e: Light-weight netdev open/stop") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index dd2097455a2e..1565324eb620 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -453,26 +453,28 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 lro_cap[0x1]; u8 lro_psh_flag[0x1]; u8 lro_time_stamp[0x1]; - u8 reserved_0[0x6]; + u8 reserved_0[0x3]; + u8 self_lb_en_modifiable[0x1]; + u8 reserved_1[0x2]; u8 max_lso_cap[0x5]; - u8 reserved_1[0x4]; + u8 reserved_2[0x4]; u8 rss_ind_tbl_cap[0x4]; - u8 reserved_2[0x3]; + u8 reserved_3[0x3]; u8 tunnel_lso_const_out_ip_id[0x1]; - u8 reserved_3[0x2]; + u8 reserved_4[0x2]; u8 tunnel_statless_gre[0x1]; u8 tunnel_stateless_vxlan[0x1]; - u8 reserved_4[0x20]; + u8 reserved_5[0x20]; - u8 reserved_5[0x10]; + u8 reserved_6[0x10]; u8 lro_min_mss_size[0x10]; - u8 reserved_6[0x120]; + u8 reserved_7[0x120]; u8 lro_timer_supported_periods[4][0x20]; - u8 reserved_7[0x600]; + u8 reserved_8[0x600]; }; struct mlx5_ifc_roce_cap_bits { @@ -4051,9 +4053,11 @@ struct mlx5_ifc_modify_tis_in_bits { }; struct mlx5_ifc_modify_tir_bitmask_bits { - u8 reserved[0x20]; + u8 reserved_0[0x20]; - u8 reserved1[0x1f]; + u8 reserved_1[0x1b]; + u8 self_lb_en[0x1]; + u8 reserved_2[0x3]; u8 lro[0x1]; }; -- cgit v1.2.3 From 500404ebcbd074ca11aa0c3fd9a268aa4054fd8b Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 3 Nov 2015 12:28:10 +0200 Subject: dmaengine: of_dma: Correct return code for of_dma_request_slave_channel in case !CONFIG_OF of_dma_request_slave_channel should return either pointer for valid dma_chan or ERR_PTR() error code, NULL is not expected to be returned. Signed-off-by: Peter Ujfalusi Acked-by: Arnd Bergmann Signed-off-by: Vinod Koul --- include/linux/of_dma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h index 36112cdd665a..b90d8ec57c1f 100644 --- a/include/linux/of_dma.h +++ b/include/linux/of_dma.h @@ -80,7 +80,7 @@ static inline int of_dma_router_register(struct device_node *np, static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np, const char *name) { - return NULL; + return ERR_PTR(-ENODEV); } static inline struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec, -- cgit v1.2.3 From aedf17f4515b12ba1cd73298e66baa69cf93010e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:36 +0100 Subject: lightnvm: change max_phys_sect to uint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The max_phys_sect variable is defined as a char. We do a boundary check to maximally allow 256 physical page descriptors per command. As we are not indexing from zero. This expression is always false. Bump the max_phys_sect to an unsigned int to support the range check. Signed-off-by: Matias Bjørling Reported-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 69c9057e1ab8..32b5369e814e 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -220,7 +220,7 @@ struct nvm_dev_ops { nvm_dev_dma_alloc_fn *dev_dma_alloc; nvm_dev_dma_free_fn *dev_dma_free; - uint8_t max_phys_sect; + unsigned int max_phys_sect; }; struct nvm_lun { -- cgit v1.2.3 From 11450469830f2481a9e7cb181609288d40f41323 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:37 +0100 Subject: lightnvm: update bad block table format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The specification was changed to reflect a multi-value bad block table. Instead of bit-based bad block table, the bad block table now allows eight bad block categories. Currently four are defined: * Factory bad blocks * Grown bad blocks * Device-side reserved blocks * Host-side reserved blocks The factory and grown bad blocks are the regular bad blocks. The reserved blocks are either for internal use or external use. In particular, the device-side reserved blocks allows the host to bootstrap from a limited number of flash blocks. Reducing the flash blocks to scan upon super block initialization. Support for both get bad block table and set bad block table is added. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 32b5369e814e..9b3dc1bc9296 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -191,11 +191,11 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata) struct nvm_block; typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *); -typedef int (nvm_bb_update_fn)(u32, void *, unsigned int, void *); +typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *); typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *); typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32, nvm_l2p_update_fn *, void *); -typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, int, unsigned int, +typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, struct ppa_addr, int, nvm_bb_update_fn *, void *); typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int); typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *); @@ -210,7 +210,7 @@ struct nvm_dev_ops { nvm_id_fn *identity; nvm_get_l2p_tbl_fn *get_l2p_tbl; nvm_op_bb_tbl_fn *get_bb_tbl; - nvm_op_set_bb_fn *set_bb; + nvm_op_set_bb_fn *set_bb_tbl; nvm_submit_io_fn *submit_io; nvm_erase_blk_fn *erase_block; -- cgit v1.2.3 From 12be5edf68e785dd5dc8665db5a88152b49c1fe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:39 +0100 Subject: lightnvm: expose mccap in identify command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mccap field is required for I/O command option support. It defines the following flash access modes: * SLC mode * Erase/Program Suspension * Scramble On/Off * Encryption It is slotted in between mpos and cpar, changing the offset for cpar as well. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 9b3dc1bc9296..2572856e2a89 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -74,6 +74,7 @@ struct nvm_id_group { u32 tbet; u32 tbem; u32 mpos; + u32 mccap; u16 cpar; u8 res[913]; } __packed; -- cgit v1.2.3 From 73387e7bed260c89628fc6a4e3632b45be9776b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:40 +0100 Subject: lightnvm: remove unused attrs in nvm_id structs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nvm_id, nvm_id_group and nvm_addr_format data structures contain reserved attributes. They are unused by media managers and targets. Remove them. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 2572856e2a89..e6ef8aaf533f 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -58,7 +58,6 @@ enum { struct nvm_id_group { u8 mtype; u8 fmtype; - u16 res16; u8 num_ch; u8 num_lun; u8 num_pln; @@ -76,8 +75,7 @@ struct nvm_id_group { u32 mpos; u32 mccap; u16 cpar; - u8 res[913]; -} __packed; +}; struct nvm_addr_format { u8 ch_offset; @@ -92,19 +90,16 @@ struct nvm_addr_format { u8 pg_len; u8 sect_offset; u8 sect_len; - u8 res[4]; }; struct nvm_id { u8 ver_id; u8 vmnt; u8 cgrps; - u8 res[5]; u32 cap; u32 dom; struct nvm_addr_format ppaf; u8 ppat; - u8 resv[224]; struct nvm_id_group groups[4]; } __packed; -- cgit v1.2.3 From 7386af270c72be65c7cb2ba4ad0d4e70dc373106 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:44 +0100 Subject: lightnvm: remove linear and device addr modes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The linear and device specific address modes can be replaced with a simple offset and bit length conversion that is generic across all devices. This both simplifies the specification and removes the special case for qemu nvme, that previously relied on the linear address mapping. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 154 +++++++++++------------------------------------ 1 file changed, 34 insertions(+), 120 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index e6ef8aaf533f..cbe288acb1de 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -99,7 +99,6 @@ struct nvm_id { u32 cap; u32 dom; struct nvm_addr_format ppaf; - u8 ppat; struct nvm_id_group groups[4]; } __packed; @@ -119,39 +118,28 @@ struct nvm_tgt_instance { #define NVM_VERSION_MINOR 0 #define NVM_VERSION_PATCH 0 -#define NVM_SEC_BITS (8) -#define NVM_PL_BITS (6) -#define NVM_PG_BITS (16) #define NVM_BLK_BITS (16) -#define NVM_LUN_BITS (10) +#define NVM_PG_BITS (16) +#define NVM_SEC_BITS (8) +#define NVM_PL_BITS (8) +#define NVM_LUN_BITS (8) #define NVM_CH_BITS (8) struct ppa_addr { + /* Generic structure for all addresses */ union { - /* Channel-based PPA format in nand 4x2x2x2x8x10 */ - struct { - u64 ch : 4; - u64 sec : 2; /* 4 sectors per page */ - u64 pl : 2; /* 4 planes per LUN */ - u64 lun : 2; /* 4 LUNs per channel */ - u64 pg : 8; /* 256 pages per block */ - u64 blk : 10;/* 1024 blocks per plane */ - u64 resved : 36; - } chnl; - - /* Generic structure for all addresses */ struct { + u64 blk : NVM_BLK_BITS; + u64 pg : NVM_PG_BITS; u64 sec : NVM_SEC_BITS; u64 pl : NVM_PL_BITS; - u64 pg : NVM_PG_BITS; - u64 blk : NVM_BLK_BITS; u64 lun : NVM_LUN_BITS; u64 ch : NVM_CH_BITS; } g; u64 ppa; }; -} __packed; +}; struct nvm_rq { struct nvm_tgt_instance *ins; @@ -259,8 +247,7 @@ struct nvm_dev { int blks_per_lun; int sec_size; int oob_size; - int addr_mode; - struct nvm_addr_format addr_format; + struct nvm_addr_format ppaf; /* Calculated/Cached values. These do not reflect the actual usable * blocks at run-time. @@ -286,118 +273,45 @@ struct nvm_dev { char name[DISK_NAME_LEN]; }; -/* fallback conversion */ -static struct ppa_addr __generic_to_linear_addr(struct nvm_dev *dev, - struct ppa_addr r) -{ - struct ppa_addr l; - - l.ppa = r.g.sec + - r.g.pg * dev->sec_per_pg + - r.g.blk * (dev->pgs_per_blk * - dev->sec_per_pg) + - r.g.lun * (dev->blks_per_lun * - dev->pgs_per_blk * - dev->sec_per_pg) + - r.g.ch * (dev->blks_per_lun * - dev->pgs_per_blk * - dev->luns_per_chnl * - dev->sec_per_pg); - - return l; -} - -/* fallback conversion */ -static struct ppa_addr __linear_to_generic_addr(struct nvm_dev *dev, - struct ppa_addr r) -{ - struct ppa_addr l; - int secs, pgs, blks, luns; - sector_t ppa = r.ppa; - - l.ppa = 0; - - div_u64_rem(ppa, dev->sec_per_pg, &secs); - l.g.sec = secs; - - sector_div(ppa, dev->sec_per_pg); - div_u64_rem(ppa, dev->sec_per_blk, &pgs); - l.g.pg = pgs; - - sector_div(ppa, dev->pgs_per_blk); - div_u64_rem(ppa, dev->blks_per_lun, &blks); - l.g.blk = blks; - - sector_div(ppa, dev->blks_per_lun); - div_u64_rem(ppa, dev->luns_per_chnl, &luns); - l.g.lun = luns; - - sector_div(ppa, dev->luns_per_chnl); - l.g.ch = ppa; - - return l; -} - -static struct ppa_addr __generic_to_chnl_addr(struct ppa_addr r) +static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev, + struct ppa_addr r) { struct ppa_addr l; - l.ppa = 0; - - l.chnl.sec = r.g.sec; - l.chnl.pl = r.g.pl; - l.chnl.pg = r.g.pg; - l.chnl.blk = r.g.blk; - l.chnl.lun = r.g.lun; - l.chnl.ch = r.g.ch; + l.ppa = ((u64)r.g.blk) << dev->ppaf.blk_offset; + l.ppa |= ((u64)r.g.pg) << dev->ppaf.pg_offset; + l.ppa |= ((u64)r.g.sec) << dev->ppaf.sect_offset; + l.ppa |= ((u64)r.g.pl) << dev->ppaf.pln_offset; + l.ppa |= ((u64)r.g.lun) << dev->ppaf.lun_offset; + l.ppa |= ((u64)r.g.ch) << dev->ppaf.ch_offset; return l; } -static struct ppa_addr __chnl_to_generic_addr(struct ppa_addr r) +static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev, + struct ppa_addr r) { struct ppa_addr l; - l.ppa = 0; - - l.g.sec = r.chnl.sec; - l.g.pl = r.chnl.pl; - l.g.pg = r.chnl.pg; - l.g.blk = r.chnl.blk; - l.g.lun = r.chnl.lun; - l.g.ch = r.chnl.ch; + /* + * (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc. + */ + l.g.blk = (r.ppa >> dev->ppaf.blk_offset) & + (((1 << dev->ppaf.blk_len) - 1)); + l.g.pg |= (r.ppa >> dev->ppaf.pg_offset) & + (((1 << dev->ppaf.pg_len) - 1)); + l.g.sec |= (r.ppa >> dev->ppaf.sect_offset) & + (((1 << dev->ppaf.sect_len) - 1)); + l.g.pl |= (r.ppa >> dev->ppaf.pln_offset) & + (((1 << dev->ppaf.pln_len) - 1)); + l.g.lun |= (r.ppa >> dev->ppaf.lun_offset) & + (((1 << dev->ppaf.lun_len) - 1)); + l.g.ch |= (r.ppa >> dev->ppaf.ch_offset) & + (((1 << dev->ppaf.ch_len) - 1)); return l; } -static inline struct ppa_addr addr_to_generic_mode(struct nvm_dev *dev, - struct ppa_addr gppa) -{ - switch (dev->addr_mode) { - case NVM_ADDRMODE_LINEAR: - return __linear_to_generic_addr(dev, gppa); - case NVM_ADDRMODE_CHANNEL: - return __chnl_to_generic_addr(gppa); - default: - BUG(); - } - return gppa; -} - -static inline struct ppa_addr generic_to_addr_mode(struct nvm_dev *dev, - struct ppa_addr gppa) -{ - switch (dev->addr_mode) { - case NVM_ADDRMODE_LINEAR: - return __generic_to_linear_addr(dev, gppa); - case NVM_ADDRMODE_CHANNEL: - return __generic_to_chnl_addr(gppa); - default: - BUG(); - } - return gppa; -} - static inline int ppa_empty(struct ppa_addr ppa_addr) { return (ppa_addr.ppa == ADDR_EMPTY); -- cgit v1.2.3 From 28f9ee22bcdd84726dbf6267d0b58f254166b900 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 16 Nov 2015 15:43:45 -0500 Subject: vlan: Do not put vlan headers back on bridge and macvlan ports When a vlan is configured with REORDER_HEADER set to 0, the vlan header is put back into the packet and makes it appear that the vlan header is still there even after it's been processed. This posses a problem for bridge and macvlan ports. The packets passed to those device may be forwarded and at the time of the forward, vlan headers end up being unexpectedly present. With the patch, we make sure that we do not put the vlan header back (when REORDER_HEADER is 0) if a bridge or macvlan has been configured on top of the vlan device. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cc221b967687..67bfac1abfc1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3857,6 +3857,11 @@ static inline bool netif_is_bridge_master(const struct net_device *dev) return dev->priv_flags & IFF_EBRIDGE; } +static inline bool netif_is_bridge_port(const struct net_device *dev) +{ + return dev->priv_flags & IFF_BRIDGE_PORT; +} + static inline bool netif_is_ovs_master(const struct net_device *dev) { return dev->priv_flags & IFF_OPENVSWITCH; -- cgit v1.2.3 From 819ec8e1f349f73bdf65bf33a364538e59007a9a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 16 Nov 2015 23:34:41 +0100 Subject: phy: marvell: Add support for 88E1540 PHY The 88E1540 can be found embedded in the Marvell 88E6352 switch. It is compatible with the 88E1510, so add support for it, using the 88E1510 specific functions. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/marvell_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index e6982ac3200d..a57f0dfb6db7 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -16,6 +16,7 @@ #define MARVELL_PHY_ID_88E1318S 0x01410e90 #define MARVELL_PHY_ID_88E1116R 0x01410e40 #define MARVELL_PHY_ID_88E1510 0x01410dd0 +#define MARVELL_PHY_ID_88E1540 0x01410eb0 #define MARVELL_PHY_ID_88E3016 0x01410e60 /* struct phy_device dev_flags definitions */ -- cgit v1.2.3 From db27a7a37aa0b1f8b373f8b0fb72a2ccaafb85b7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Nov 2015 09:03:50 +0100 Subject: KVM: Provide function for VCPU lookup by id Let's provide a function to lookup a VCPU by id. Reviewed-by: Christian Borntraeger Reviewed-by: Dominik Dingel Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger [split patch from refactoring patch] --- include/linux/kvm_host.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5706a2108f0a..c923350ca20a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -460,6 +460,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ idx++) +static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) +{ + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) + if (vcpu->vcpu_id == id) + return vcpu; + return NULL; +} + #define kvm_for_each_memslot(memslot, slots) \ for (memslot = &slots->memslots[0]; \ memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\ -- cgit v1.2.3 From 851df3dc11136fde86ebd78ee7527cb43c7cd349 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Nov 2015 22:34:58 +0100 Subject: scpi: hide get_scpi_ops in module from built-in code The scpi_clock driver can be built-in when CONFIG_COMPILE_TEST is set even when ARM_SCPI_PROTOCOL is a loadable module, and that results in a link error: drivers/built-in.o: In function `scpi_clocks_probe': (.text+0x14453c): undefined reference to `get_scpi_ops' Using #if IS_REACHABLE() around the get_scpi_ops() declaration makes it build successfully in this case for compile-testing, but the effect is the same as when ARM_SCPI_PROTOCOL is disabled, as the code will not be used. Signed-off-by: Arnd Bergmann Acked-by: Punit Agrawal --- include/linux/scpi_protocol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h index 80af3cd35ae4..72ce932c69b2 100644 --- a/include/linux/scpi_protocol.h +++ b/include/linux/scpi_protocol.h @@ -71,7 +71,7 @@ struct scpi_ops { int (*sensor_get_value)(u16, u32 *); }; -#if IS_ENABLED(CONFIG_ARM_SCPI_PROTOCOL) +#if IS_REACHABLE(CONFIG_ARM_SCPI_PROTOCOL) struct scpi_ops *get_scpi_ops(void); #else static inline struct scpi_ops *get_scpi_ops(void) { return NULL; } -- cgit v1.2.3 From 2e6edc95382cc36423aff18a237173ad62d5ab52 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Nov 2015 13:29:28 -0800 Subject: block: protect rw_page against device teardown Fix use after free crashes like the following: general protection fault: 0000 [#1] SMP Call Trace: [] ? pmem_do_bvec.isra.12+0xa6/0xf0 [nd_pmem] [] pmem_rw_page+0x42/0x80 [nd_pmem] [] bdev_read_page+0x50/0x60 [] do_mpage_readpage+0x510/0x770 [] ? I_BDEV+0x20/0x20 [] ? lru_cache_add+0x1c/0x50 [] mpage_readpages+0x107/0x170 [] ? I_BDEV+0x20/0x20 [] ? I_BDEV+0x20/0x20 [] blkdev_readpages+0x1d/0x20 [] __do_page_cache_readahead+0x28f/0x310 [] ? __do_page_cache_readahead+0x169/0x310 [] ? pagecache_get_page+0x2d/0x1d0 [] filemap_fault+0x396/0x530 [] __do_fault+0x4e/0xf0 [] handle_mm_fault+0x11bd/0x1b50 Cc: Cc: Jens Axboe Cc: Alexander Viro Reported-by: kbuild test robot Acked-by: Matthew Wilcox [willy: symmetry fixups] Signed-off-by: Dan Williams --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3fe27f8d91f0..c0d2b7927c1f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -794,6 +794,8 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); +extern int blk_queue_enter(struct request_queue *q, gfp_t gfp); +extern void blk_queue_exit(struct request_queue *q); extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); -- cgit v1.2.3 From b466c1dd73d5303a313fb0c962e4eb5879bc1336 Mon Sep 17 00:00:00 2001 From: Simon Wood Date: Thu, 19 Nov 2015 16:42:14 -0700 Subject: HID: Add vendor specific usage pages for Logitech G920 The Logitech G920 uses a couple of vendor specific usage pages, which results in incorrect number of axis/buttons being detected. This patch adds these pages to the 'ignore' list. Reported-by: Elias Vanderstuyft Signed-off-by: Simon Wood Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 251a1d382e23..a6d7a3fc2cb3 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -168,6 +168,8 @@ struct hid_item { #define HID_UP_MSVENDOR 0xff000000 #define HID_UP_CUSTOM 0x00ff0000 #define HID_UP_LOGIVENDOR 0xffbc0000 +#define HID_UP_LOGIVENDOR2 0xff090000 +#define HID_UP_LOGIVENDOR3 0xff430000 #define HID_UP_LNVENDOR 0xffa00000 #define HID_UP_SENSOR 0x00200000 -- cgit v1.2.3 From 0b59733b95f9d7af6bee6e6a4d0d444eb694c514 Mon Sep 17 00:00:00 2001 From: Javier Gonzalez Date: Fri, 20 Nov 2015 13:47:56 +0100 Subject: lightnvm: keep track of block counts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maintain number of in use blocks, free blocks, and bad blocks in a per lun basis. This allows the upper layers to get information about the state of each lun. Also, account for blocks reserved to the device on the free block count. nr_free_blocks matches now the actual number of blocks on the free list when the device is booted. Signed-off-by: Javier Gonzalez Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index cbe288acb1de..831a20cf070c 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -213,7 +213,9 @@ struct nvm_lun { int lun_id; int chnl_id; + unsigned int nr_inuse_blocks; /* Number of used blocks */ unsigned int nr_free_blocks; /* Number of unused blocks */ + unsigned int nr_bad_blocks; /* Number of bad blocks */ struct nvm_block *blocks; spinlock_t lock; -- cgit v1.2.3 From 2fde0e482db2b43bb4ed0e9aebfbe78ebcbbf5a6 Mon Sep 17 00:00:00 2001 From: Javier Gonzalez Date: Fri, 20 Nov 2015 13:47:57 +0100 Subject: lightnvm: add free and bad lun info to show luns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add free block, used block, and bad block information to the show debug interface. This information is used to debug how targets track blocks. Also, change debug function name to make it more generic. Signed-off-by: Javier Gonzalez Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 831a20cf070c..3db5552b17d5 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -380,7 +380,7 @@ typedef int (nvmm_end_io_fn)(struct nvm_rq *, int); typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, unsigned long); typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int); -typedef void (nvmm_free_blocks_print_fn)(struct nvm_dev *); +typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *); struct nvmm_type { const char *name; @@ -404,7 +404,7 @@ struct nvmm_type { nvmm_get_lun_fn *get_lun; /* Statistics */ - nvmm_free_blocks_print_fn *free_blocks_print; + nvmm_lun_info_print_fn *lun_info_print; struct list_head list; }; -- cgit v1.2.3 From 94a58c360a45c066ab5472cfd2bf2a4ba63aa532 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 20 Nov 2015 15:56:48 -0800 Subject: slab.h: sprinkle __assume_aligned attributes The various allocators return aligned memory. Telling the compiler that allows it to generate better code in many cases, for example when the return value is immediately passed to memset(). Some code does become larger, but at least we win twice as much as we lose: $ scripts/bloat-o-meter /tmp/vmlinux vmlinux add/remove: 0/0 grow/shrink: 13/52 up/down: 995/-2140 (-1145) An example of the different (and smaller) code can be seen in mm_alloc(). Before: : 48 8d 78 08 lea 0x8(%rax),%rdi : 48 89 c1 mov %rax,%rcx : 48 89 c2 mov %rax,%rdx : 48 c7 00 00 00 00 00 movq $0x0,(%rax) : 48 c7 80 48 03 00 00 movq $0x0,0x348(%rax) : 00 00 00 00 : 31 c0 xor %eax,%eax : 48 83 e7 f8 and $0xfffffffffffffff8,%rdi : 48 29 f9 sub %rdi,%rcx : 81 c1 50 03 00 00 add $0x350,%ecx : c1 e9 03 shr $0x3,%ecx : f3 48 ab rep stos %rax,%es:(%rdi) After: : 48 89 c2 mov %rax,%rdx : b9 6a 00 00 00 mov $0x6a,%ecx : 31 c0 xor %eax,%eax : 48 89 d7 mov %rdx,%rdi : f3 48 ab rep stos %rax,%es:(%rdi) So gcc's strategy is to do two possibly (but not really, of course) unaligned stores to the first and last word, then do an aligned rep stos covering the middle part with a little overlap. Maybe arches which do not allow unaligned stores gain even more. I don't know if gcc can actually make use of alignments greater than 8 for anything, so one could probably drop the __assume_xyz_alignment macros and just use __assume_aligned(8). The increases in code size are mostly caused by gcc deciding to opencode strlen() using the check-four-bytes-at-a-time trick when it knows the buffer is sufficiently aligned (one function grew by 200 bytes). Now it turns out that many of these strlen() calls showing up were in fact redundant, and they're gone from -next. Applying the two patches to next-20151001 bloat-o-meter instead says add/remove: 0/0 grow/shrink: 6/52 up/down: 244/-2140 (-1896) Signed-off-by: Rasmus Villemoes Acked-by: Christoph Lameter Cc: David Rientjes Cc: Pekka Enberg Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 7c82e3b307a3..96940772bb92 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -157,6 +157,24 @@ size_t ksize(const void *); #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) #endif +/* + * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. + * Intended for arches that get misalignment faults even for 64 bit integer + * aligned buffers. + */ +#ifndef ARCH_SLAB_MINALIGN +#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) +#endif + +/* + * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned + * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN + * aligned pointers. + */ +#define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN) +#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN) +#define __assume_page_alignment __assume_aligned(PAGE_SIZE) + /* * Kmalloc array related definitions */ @@ -286,8 +304,8 @@ static __always_inline int kmalloc_index(size_t size) } #endif /* !CONFIG_SLOB */ -void *__kmalloc(size_t size, gfp_t flags); -void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags); +void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment; +void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment; void kmem_cache_free(struct kmem_cache *, void *); /* @@ -301,8 +319,8 @@ void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); #ifdef CONFIG_NUMA -void *__kmalloc_node(size_t size, gfp_t flags, int node); -void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); +void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; +void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment; #else static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node) { @@ -316,12 +334,12 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f #endif #ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t); +extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment; #ifdef CONFIG_NUMA extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size); + int node, size_t size) __assume_slab_alignment; #else static __always_inline void * kmem_cache_alloc_node_trace(struct kmem_cache *s, @@ -354,10 +372,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, } #endif /* CONFIG_TRACING */ -extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order); +extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment; #ifdef CONFIG_TRACING -extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order); +extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment; #else static __always_inline void * kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) @@ -482,15 +500,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) return __kmalloc_node(size, flags, node); } -/* - * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. - * Intended for arches that get misalignment faults even for 64 bit integer - * aligned buffers. - */ -#ifndef ARCH_SLAB_MINALIGN -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) -#endif - struct memcg_cache_array { struct rcu_head rcu; struct kmem_cache *entries[0]; -- cgit v1.2.3 From 5cf6a51e6062afe7cc507f32f1e5f7e6497ae844 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Fri, 20 Nov 2015 15:56:53 -0800 Subject: configfs: allow dynamic group creation This patchset introduces IIO software triggers, offers a way of configuring them via configfs and adds the IIO hrtimer based interrupt source to be used with software triggers. The architecture is now split in 3 parts, to remove all IIO trigger specific parts from IIO configfs core: (1) IIO configfs - creates the root of the IIO configfs subsys. (2) IIO software triggers - software trigger implementation, dynamically creating /config/iio/triggers group. (3) IIO hrtimer trigger - is the first interrupt source for software triggers (with syfs to follow). Each trigger type can implement its own set of attributes. Lockdep seems to be happy with the locking in configfs patch. This patch (of 5): We don't want to hardcode default groups at subsystem creation time. We export: * configfs_register_group * configfs_unregister_group to allow drivers to programatically create/destroy groups later, after module init time. This is needed for IIO configfs support. (akpm: the other 4 patches to be merged via the IIO tree) Signed-off-by: Daniel Baluta Suggested-by: Lars-Peter Clausen Reviewed-by: Christoph Hellwig Acked-by: Joel Becker Cc: Hartmut Knaack Cc: Octavian Purdila Cc: Paul Bolle Cc: Adriana Reus Cc: Cristina Opriceana Cc: Peter Meerwald Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/configfs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index a8a335b7fce0..758a029011b1 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -197,6 +197,16 @@ static inline struct configfs_subsystem *to_configfs_subsystem(struct config_gro int configfs_register_subsystem(struct configfs_subsystem *subsys); void configfs_unregister_subsystem(struct configfs_subsystem *subsys); +int configfs_register_group(struct config_group *parent_group, + struct config_group *group); +void configfs_unregister_group(struct config_group *group); + +struct config_group * +configfs_register_default_group(struct config_group *parent_group, + const char *name, + struct config_item_type *item_type); +void configfs_unregister_default_group(struct config_group *group); + /* These functions can sleep and can alloc with GFP_KERNEL */ /* WARNING: These cannot be called underneath configfs callbacks!! */ int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target); -- cgit v1.2.3 From 9d8a765211335cfdad464b90fb19f546af5706ae Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 20 Nov 2015 15:57:21 -0800 Subject: kernel/signal.c: unexport sigsuspend() sigsuspend() is nowhere used except in signal.c itself, so we can mark it static do not pollute the global namespace. But this patch is more than a boring cleanup patch, it fixes a real issue on UserModeLinux. UML has a special console driver to display ttys using xterm, or other terminal emulators, on the host side. Vegard reported that sometimes UML is unable to spawn a xterm and he's facing the following warning: WARNING: CPU: 0 PID: 908 at include/linux/thread_info.h:128 sigsuspend+0xab/0xc0() It turned out that this warning makes absolutely no sense as the UML xterm code calls sigsuspend() on the host side, at least it tries. But as the kernel itself offers a sigsuspend() symbol the linker choose this one instead of the glibc wrapper. Interestingly this code used to work since ever but always blocked signals on the wrong side. Some recent kernel change made the WARN_ON() trigger and uncovered the bug. It is a wonderful example of how much works by chance on computers. :-) Fixes: 68f3f16d9ad0f1 ("new helper: sigsuspend()") Signed-off-by: Richard Weinberger Reported-by: Vegard Nossum Tested-by: Vegard Nossum Acked-by: Oleg Nesterov Cc: [3.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/signal.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index ab1e0392b5ac..92557bbce7e7 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -239,7 +239,6 @@ extern int sigprocmask(int, sigset_t *, sigset_t *); extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; -extern int sigsuspend(sigset_t *); struct sigaction { #ifndef __ARCH_HAS_IRIX_SIGACTION -- cgit v1.2.3 From 21fa8442799945beaca074cb5bcf7cfe24969d59 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 20 Nov 2015 15:57:32 -0800 Subject: mm: fix up sparse warning in gfpflags_allow_blocking sparse says: include/linux/gfp.h:274:26: warning: incorrect type in return expression (different base types) include/linux/gfp.h:274:26: expected bool include/linux/gfp.h:274:26: got restricted gfp_t ...add a forced cast to silence the warning. Signed-off-by: Jeff Layton Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 6523109e136d..8942af0813e3 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -271,7 +271,7 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) { - return gfp_flags & __GFP_DIRECT_RECLAIM; + return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM); } #ifdef CONFIG_HIGHMEM -- cgit v1.2.3 From 6b2a3d628aa752f0ab825fc6d4d07b09e274d1c1 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 8 Nov 2015 08:52:31 -0500 Subject: tty: audit: Fix audit source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The data to audit/record is in the 'from' buffer (ie., the input read buffer). Fixes: 72586c6061ab ("n_tty: Fix auditing support for cannonical mode") Cc: stable # 4.1+ Cc: Miloslav Trmač Signed-off-by: Peter Hurley Acked-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 5b04b0a5375b..5e31f1b99037 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -607,7 +607,7 @@ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); /* tty_audit.c */ #ifdef CONFIG_AUDIT -extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, +extern void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size, unsigned icanon); extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); @@ -615,8 +615,8 @@ extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern void tty_audit_push(struct tty_struct *tty); extern int tty_audit_push_current(void); #else -static inline void tty_audit_add_data(struct tty_struct *tty, - unsigned char *data, size_t size, unsigned icanon) +static inline void tty_audit_add_data(struct tty_struct *tty, const void *data, + size_t size, unsigned icanon) { } static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) -- cgit v1.2.3 From 865762a8119e74b5f0e236d2d8eaaf8be9292a06 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 20 Nov 2015 15:57:58 -0800 Subject: slab/slub: adjust kmem_cache_alloc_bulk API Adjust kmem_cache_alloc_bulk API before we have any real users. Adjust API to return type 'int' instead of previously type 'bool'. This is done to allow future extension of the bulk alloc API. A future extension could be to allow SLUB to stop at a page boundary, when specified by a flag, and then return the number of objects. The advantage of this approach, would make it easier to make bulk alloc run without local IRQs disabled. With an approach of cmpxchg "stealing" the entire c->freelist or page->freelist. To avoid overshooting we would stop processing at a slab-page boundary. Else we always end up returning some objects at the cost of another cmpxchg. To keep compatible with future users of this API linking against an older kernel when using the new flag, we need to return the number of allocated objects with this API change. Signed-off-by: Jesper Dangaard Brouer Cc: Vladimir Davydov Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 96940772bb92..2037a861e367 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -316,7 +316,7 @@ void kmem_cache_free(struct kmem_cache *, void *); * Note that interrupts must be enabled when calling these functions. */ void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); -bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); +int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); #ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; -- cgit v1.2.3 From c86b3de8c8b02d7e474fdc002c8df533b844524c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Nov 2015 17:48:52 +0100 Subject: thermal: fix thermal_zone_bind_cooling_device prototype When the prototype for thermal_zone_bind_cooling_device changed, the static inline wrapper function was left alone, which in theory can cause build warnings: I have seen this error in the past: drivers/thermal/db8500_thermal.c: In function 'db8500_cdev_bind': drivers/thermal/db8500_thermal.c:78:9: error: too many arguments to function 'thermal_zone_bind_cooling_device' ret = thermal_zone_bind_cooling_device(thermal, i, cdev, while this one no longer shows up, there is no doubt that the prototype is still wrong, so let's just fix it anyway. Signed-off-by: Arnd Bergmann Fixes: 6cd9e9f629f1 ("thermal: of: fix cooling device weights in device tree") Signed-off-by: Eduardo Valentin --- include/linux/thermal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 4014a59828fc..613c29bd6baf 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -438,7 +438,8 @@ static inline void thermal_zone_device_unregister( static inline int thermal_zone_bind_cooling_device( struct thermal_zone_device *tz, int trip, struct thermal_cooling_device *cdev, - unsigned long upper, unsigned long lower) + unsigned long upper, unsigned long lower, + unsigned int weight) { return -ENODEV; } static inline int thermal_zone_unbind_cooling_device( struct thermal_zone_device *tz, int trip, -- cgit v1.2.3 From 91ab4b4d16e6649fbbf65f303c0c4e20ed680bd1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 19 Nov 2015 14:30:26 -0500 Subject: nfs: use sliding delay when LAYOUTGET gets NFS4ERR_DELAY When LAYOUTGET gets NFS4ERR_DELAY, we currently will wait 15s before retrying the call. That is a _very_ long time, so add a timeout value to struct nfs4_layoutget and pass nfs4_async_handle_error a pointer to it. This allows the RPC engine to use a sliding delay window, instead of a 15s delay. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 570d630f98ae..11bbae44f4cb 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -251,6 +251,7 @@ struct nfs4_layoutget { struct nfs4_layoutget_res res; struct rpc_cred *cred; gfp_t gfp_flags; + long timeout; }; struct nfs4_getdeviceinfo_args { -- cgit v1.2.3 From fbc416ff86183e2203cdf975e2881d7c164b0271 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Nov 2015 12:12:21 +0100 Subject: arm64: fix building without CONFIG_UID16 As reported by Michal Simek, building an ARM64 kernel with CONFIG_UID16 disabled currently fails because the system call table still needs to reference the individual function entry points that are provided by kernel/sys_ni.c in this case, and the declarations are hidden inside of #ifdef CONFIG_UID16: arch/arm64/include/asm/unistd32.h:57:8: error: 'sys_lchown16' undeclared here (not in a function) __SYSCALL(__NR_lchown, sys_lchown16) I believe this problem only exists on ARM64, because older architectures tend to not need declarations when their system call table is built in assembly code, while newer architectures tend to not need UID16 support. ARM64 only uses these system calls for compatibility with 32-bit ARM binaries. This changes the CONFIG_UID16 check into CONFIG_HAVE_UID16, which is set unconditionally on ARM64 with CONFIG_COMPAT, so we see the declarations whenever we need them, but otherwise the behavior is unchanged. Fixes: af1839eb4bd4 ("Kconfig: clean up the long arch list for the UID16 config option") Signed-off-by: Arnd Bergmann Acked-by: Will Deacon Cc: stable@vger.kernel.org Signed-off-by: Catalin Marinas --- include/linux/syscalls.h | 2 +- include/linux/types.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a156b82dd14c..c2b66a277e98 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -524,7 +524,7 @@ asmlinkage long sys_chown(const char __user *filename, asmlinkage long sys_lchown(const char __user *filename, uid_t user, gid_t group); asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group); -#ifdef CONFIG_UID16 +#ifdef CONFIG_HAVE_UID16 asmlinkage long sys_chown16(const char __user *filename, old_uid_t user, old_gid_t group); asmlinkage long sys_lchown16(const char __user *filename, diff --git a/include/linux/types.h b/include/linux/types.h index 70d8500bddf1..70dd3dfde631 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -35,7 +35,7 @@ typedef __kernel_gid16_t gid16_t; typedef unsigned long uintptr_t; -#ifdef CONFIG_UID16 +#ifdef CONFIG_HAVE_UID16 /* This is defined by include/asm-{arch}/posix_types.h */ typedef __kernel_old_uid_t old_uid_t; typedef __kernel_old_gid_t old_gid_t; -- cgit v1.2.3 From 7c7a0e945349a3d0d497d7f32db6ed33d4031110 Mon Sep 17 00:00:00 2001 From: Gabriele Paoloni Date: Wed, 11 Nov 2015 09:12:25 +0800 Subject: ARM/PCI: Move align_resource function pointer to pci_host_bridge structure Commit b3a72384fe29 ("ARM/PCI: Replace pci_sys_data->align_resource with global function pointer") introduced an ARM-specific align_resource() function pointer. This is not portable to other arches and doesn't work for platforms with two different PCIe host bridge controllers. Move the function pointer to the pci_host_bridge structure so each host bridge driver can specify its own align_resource() function. Signed-off-by: Gabriele Paoloni Signed-off-by: Bjorn Helgaas Reviewed-by: Arnd Bergmann --- include/linux/pci.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index e828e7b4afec..6ae25aae88fd 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -412,9 +412,18 @@ struct pci_host_bridge { void (*release_fn)(struct pci_host_bridge *); void *release_data; unsigned int ignore_reset_delay:1; /* for entire hierarchy */ + /* Resource alignment requirements */ + resource_size_t (*align_resource)(struct pci_dev *dev, + const struct resource *res, + resource_size_t start, + resource_size_t size, + resource_size_t align); }; #define to_pci_host_bridge(n) container_of(n, struct pci_host_bridge, dev) + +struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus); + void pci_set_host_bridge_release(struct pci_host_bridge *bridge, void (*release_fn)(struct pci_host_bridge *), void *release_data); -- cgit v1.2.3 From 3a66d7dca186ebdef9b0bf55e216778fa598062c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 22 Oct 2015 16:02:14 -0700 Subject: kref: Remove kref_put_spinlock_irqsave() The last user is gone. Hence remove this function. Signed-off-by: Bart Van Assche Cc: Greg Kroah-Hartman Cc: Christoph Hellwig Cc: Joern Engel Signed-off-by: Nicholas Bellinger --- include/linux/kref.h | 33 --------------------------------- 1 file changed, 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kref.h b/include/linux/kref.h index 484604d184be..e15828fd71f1 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -19,7 +19,6 @@ #include #include #include -#include struct kref { atomic_t refcount; @@ -99,38 +98,6 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref) return kref_sub(kref, 1, release); } -/** - * kref_put_spinlock_irqsave - decrement refcount for object. - * @kref: object. - * @release: pointer to the function that will clean up the object when the - * last reference to the object is released. - * This pointer is required, and it is not acceptable to pass kfree - * in as this function. - * @lock: lock to take in release case - * - * Behaves identical to kref_put with one exception. If the reference count - * drops to zero, the lock will be taken atomically wrt dropping the reference - * count. The release function has to call spin_unlock() without _irqrestore. - */ -static inline int kref_put_spinlock_irqsave(struct kref *kref, - void (*release)(struct kref *kref), - spinlock_t *lock) -{ - unsigned long flags; - - WARN_ON(release == NULL); - if (atomic_add_unless(&kref->refcount, -1, 1)) - return 0; - spin_lock_irqsave(lock, flags); - if (atomic_dec_and_test(&kref->refcount)) { - release(kref); - local_irq_restore(flags); - return 1; - } - spin_unlock_irqrestore(lock, flags); - return 0; -} - static inline int kref_put_mutex(struct kref *kref, void (*release)(struct kref *kref), struct mutex *lock) -- cgit v1.2.3 From d8ce9bf5551bfea431893bdd0a943f24a5170828 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sun, 27 Dec 2015 17:25:20 +0800 Subject: HID: move to_hid_device() to hid.h to_hid_device() macro is defined in both hid-lg4ff.c and hid-logitech-hidpp.c. So I move it to include/linux/hid.h. Signed-off-by: Geliang Tang Signed-off-by: Jiri Kosina --- include/linux/hid.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index a6d7a3fc2cb3..1472026367ed 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -565,6 +565,9 @@ struct hid_device { /* device report descriptor */ wait_queue_head_t debug_wait; }; +#define to_hid_device(pdev) \ + container_of(pdev, struct hid_device, dev) + static inline void *hid_get_drvdata(struct hid_device *hdev) { return dev_get_drvdata(&hdev->dev); -- cgit v1.2.3 From ba91a96718d17160890e161f702db6e60747248a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sun, 27 Dec 2015 17:25:22 +0800 Subject: HID: add a new helper to_hid_driver() Add a new helper to_hid_driver() and use it in hid-core.c. Signed-off-by: Geliang Tang Signed-off-by: Jiri Kosina --- include/linux/hid.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 1472026367ed..75b66eccc692 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -717,6 +717,9 @@ struct hid_driver { struct device_driver driver; }; +#define to_hid_driver(pdrv) \ + container_of(pdrv, struct hid_driver, driver) + /** * hid_ll_driver - low level driver callbacks * @start: called on probe to start the device -- cgit v1.2.3