From 0461a4149836c792d186027c8c859637a4cfb11a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 9 Dec 2014 21:38:05 +0000 Subject: spi: Pump transfers inside calling context for spi_sync() If we are using the standard SPI message pump (which all drivers should be transitioning over to) then special case the message enqueue and instead of starting the worker thread to push messages to the hardware do so in the context of the caller if the controller is idle. This avoids a context switch in the common case where the controller has a single user in a single thread, for short PIO transfers there may be no need to context switch away from the calling context to complete the transfer. The code is a bit more complex than is desirable in part due to the need to handle drivers not using the standard queue and in part due to handling the various combinations of bus locking and asynchronous submission in interrupt context. It is still suboptimal since it will still wake the message pump for each transfer in order to schedule idling of the hardware and if multiple contexts are using the controller simultaneously a caller may end up pumping a message for some random other thread rather than for itself, and if the thread ends up deferring due to another context idling the hardware then it will just busy wait. It can, however, have the benefit of aggregating power up and down of the hardware when a caller performs a series of transfers back to back without any need for the use of spi_async(). Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index a6ef2a8e6de4..4e6db75e9469 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -260,6 +260,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @pump_messages: work struct for scheduling work to the message pump * @queue_lock: spinlock to syncronise access to message queue * @queue: message queue + * @idling: the device is entering idle state * @cur_msg: the currently in-flight message * @cur_msg_prepared: spi_prepare_message was called for the currently * in-flight message @@ -425,6 +426,7 @@ struct spi_master { spinlock_t queue_lock; struct list_head queue; struct spi_message *cur_msg; + bool idling; bool busy; bool running; bool rt; -- cgit v1.2.3 From e39f2d5956999c05c85814787a113ffadbcd4b26 Mon Sep 17 00:00:00 2001 From: Andrew Duggan Date: Fri, 12 Dec 2014 10:17:26 -0800 Subject: HID: rmi: Scan the report descriptor to determine if the device is suitable for the hid-rmi driver On composite HID devices there may be multiple HID devices on separate interfaces, but hid-rmi should only bind to the touchpad. The previous version simply checked that the interface protocol was set to mouse. Unfortuately, it is not always the case that the touchpad has the mouse interface protocol set. This patch takes a different approach and scans the report descriptor looking for the Generic Desktop Pointer usage and the Vendor Specific Top Level Collection needed by the hid-rmi driver to interface with the device. Signed-off-by: Andrew Duggan Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 06c4607744f6..efc7787a41a8 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -574,7 +574,9 @@ static inline void hid_set_drvdata(struct hid_device *hdev, void *data) #define HID_GLOBAL_STACK_SIZE 4 #define HID_COLLECTION_STACK_SIZE 4 -#define HID_SCAN_FLAG_MT_WIN_8 0x00000001 +#define HID_SCAN_FLAG_MT_WIN_8 BIT(0) +#define HID_SCAN_FLAG_VENDOR_SPECIFIC BIT(1) +#define HID_SCAN_FLAG_GD_POINTER BIT(2) struct hid_parser { struct hid_global global; -- cgit v1.2.3 From d64cb71bede87dbca60d586a7bb4cef87fbe2731 Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Wed, 17 Dec 2014 10:31:08 -0800 Subject: Input: add regulator haptic driver This change adds support for haptic driver controlled by voltage of a regulator. Userspace can control the device via Force Feedback interface from input framework. Signed-off-by: Jaewon Kim Signed-off-by: Hyunhee Kim Acked-by: Kyungmin Park Tested-by: Chanwoo Choi Reviewed-by: Chanwoo Choi Reviewed-by: Pankaj Dubey Signed-off-by: Dmitry Torokhov --- include/linux/platform_data/regulator-haptic.h | 29 ++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 include/linux/platform_data/regulator-haptic.h (limited to 'include/linux') diff --git a/include/linux/platform_data/regulator-haptic.h b/include/linux/platform_data/regulator-haptic.h new file mode 100644 index 000000000000..5658e58e0738 --- /dev/null +++ b/include/linux/platform_data/regulator-haptic.h @@ -0,0 +1,29 @@ +/* + * Regulator Haptic Platform Data + * + * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * Author: Jaewon Kim + * Author: Hyunhee Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _REGULATOR_HAPTIC_H +#define _REGULATOR_HAPTIC_H + +/* + * struct regulator_haptic_data - Platform device data + * + * @max_volt: maximum voltage value supplied to the haptic motor. + * + * @min_volt: minimum voltage value supplied to the haptic motor. + * + */ +struct regulator_haptic_data { + unsigned int max_volt; + unsigned int min_volt; +}; + +#endif /* _REGULATOR_HAPTIC_H */ -- cgit v1.2.3 From c5f4546593e9911800f0926c1090959b58bc5c93 Mon Sep 17 00:00:00 2001 From: Seth Jennings Date: Tue, 16 Dec 2014 11:58:18 -0600 Subject: livepatch: kernel: add TAINT_LIVEPATCH This adds a new taint flag to indicate when the kernel or a kernel module has been live patched. This will provide a clean indication in bug reports that live patching was used. Additionally, if the crash occurs in a live patched function, the live patch module will appear beside the patched function in the backtrace. Signed-off-by: Seth Jennings Acked-by: Josh Poimboeuf Reviewed-by: Miroslav Benes Reviewed-by: Petr Mladek Reviewed-by: Masami Hiramatsu Signed-off-by: Jiri Kosina --- include/linux/kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5449d2f4a1ef..d03e3deee091 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -471,6 +471,7 @@ extern enum system_states { #define TAINT_OOT_MODULE 12 #define TAINT_UNSIGNED_MODULE 13 #define TAINT_SOFTLOCKUP 14 +#define TAINT_LIVEPATCH 15 extern const char hex_asc[]; #define hex_asc_lo(x) hex_asc[((x) & 0x0f)] -- cgit v1.2.3 From b700e7f03df5d92f85fa5247fe1f557528d3363d Mon Sep 17 00:00:00 2001 From: Seth Jennings Date: Tue, 16 Dec 2014 11:58:19 -0600 Subject: livepatch: kernel: add support for live patching This commit introduces code for the live patching core. It implements an ftrace-based mechanism and kernel interface for doing live patching of kernel and kernel module functions. It represents the greatest common functionality set between kpatch and kgraft and can accept patches built using either method. This first version does not implement any consistency mechanism that ensures that old and new code do not run together. In practice, ~90% of CVEs are safe to apply in this way, since they simply add a conditional check. However, any function change that can not execute safely with the old version of the function can _not_ be safely applied in this version. [ jkosina@suse.cz: due to the number of contributions that got folded into this original patch from Seth Jennings, add SUSE's copyright as well, as discussed via e-mail ] Signed-off-by: Seth Jennings Signed-off-by: Josh Poimboeuf Reviewed-by: Miroslav Benes Reviewed-by: Petr Mladek Reviewed-by: Masami Hiramatsu Signed-off-by: Miroslav Benes Signed-off-by: Petr Mladek Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 133 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 include/linux/livepatch.h (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h new file mode 100644 index 000000000000..950bc615842f --- /dev/null +++ b/include/linux/livepatch.h @@ -0,0 +1,133 @@ +/* + * livepatch.h - Kernel Live Patching Core + * + * Copyright (C) 2014 Seth Jennings + * Copyright (C) 2014 SUSE + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _LINUX_LIVEPATCH_H_ +#define _LINUX_LIVEPATCH_H_ + +#include +#include + +#if IS_ENABLED(CONFIG_LIVE_PATCHING) + +#include + +enum klp_state { + KLP_DISABLED, + KLP_ENABLED +}; + +/** + * struct klp_func - function structure for live patching + * @old_name: name of the function to be patched + * @new_func: pointer to the patched function code + * @old_addr: a hint conveying at what address the old function + * can be found (optional, vmlinux patches only) + * @kobj: kobject for sysfs resources + * @fops: ftrace operations structure + * @state: tracks function-level patch application state + */ +struct klp_func { + /* external */ + const char *old_name; + void *new_func; + /* + * The old_addr field is optional and can be used to resolve + * duplicate symbol names in the vmlinux object. If this + * information is not present, the symbol is located by name + * with kallsyms. If the name is not unique and old_addr is + * not provided, the patch application fails as there is no + * way to resolve the ambiguity. + */ + unsigned long old_addr; + + /* internal */ + struct kobject kobj; + struct ftrace_ops *fops; + enum klp_state state; +}; + +/** + * struct klp_reloc - relocation structure for live patching + * @loc: address where the relocation will be written + * @val: address of the referenced symbol (optional, + * vmlinux patches only) + * @type: ELF relocation type + * @name: name of the referenced symbol (for lookup/verification) + * @addend: offset from the referenced symbol + * @external: symbol is either exported or within the live patch module itself + */ +struct klp_reloc { + unsigned long loc; + unsigned long val; + unsigned long type; + const char *name; + int addend; + int external; +}; + +/** + * struct klp_object - kernel object structure for live patching + * @name: module name (or NULL for vmlinux) + * @relocs: relocation entries to be applied at load time + * @funcs: function entries for functions to be patched in the object + * @kobj: kobject for sysfs resources + * @mod: kernel module associated with the patched object + * (NULL for vmlinux) + * @state: tracks object-level patch application state + */ +struct klp_object { + /* external */ + const char *name; + struct klp_reloc *relocs; + struct klp_func *funcs; + + /* internal */ + struct kobject *kobj; + struct module *mod; + enum klp_state state; +}; + +/** + * struct klp_patch - patch structure for live patching + * @mod: reference to the live patch module + * @objs: object entries for kernel objects to be patched + * @list: list node for global list of registered patches + * @kobj: kobject for sysfs resources + * @state: tracks patch-level application state + */ +struct klp_patch { + /* external */ + struct module *mod; + struct klp_object *objs; + + /* internal */ + struct list_head list; + struct kobject kobj; + enum klp_state state; +}; + +extern int klp_register_patch(struct klp_patch *); +extern int klp_unregister_patch(struct klp_patch *); +extern int klp_enable_patch(struct klp_patch *); +extern int klp_disable_patch(struct klp_patch *); + +#endif /* CONFIG_LIVE_PATCHING */ + +#endif /* _LINUX_LIVEPATCH_H_ */ -- cgit v1.2.3 From 2c658e212ce7e40ace56d9441c8c5634d4d420e3 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 18 Dec 2014 16:12:08 +0200 Subject: spi: Remove FSF mailing addresses Signed-off-by: Jarkko Nikula Signed-off-by: Mark Brown --- include/linux/spi/at86rf230.h | 4 ---- include/linux/spi/l4f00242t03.h | 4 ---- include/linux/spi/lms283gf05.h | 4 ---- include/linux/spi/mxs-spi.h | 4 ---- include/linux/spi/pxa2xx_spi.h | 4 ---- include/linux/spi/rspi.h | 5 ----- include/linux/spi/sh_hspi.h | 4 ---- include/linux/spi/spi.h | 4 ---- include/linux/spi/tle62x0.h | 4 ---- include/linux/spi/tsc2005.h | 5 ----- 10 files changed, 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h index b2b1afbb3202..cd519a11c2c6 100644 --- a/include/linux/spi/at86rf230.h +++ b/include/linux/spi/at86rf230.h @@ -12,10 +12,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * * Written by: * Dmitry Eremin-Solenikov */ diff --git a/include/linux/spi/l4f00242t03.h b/include/linux/spi/l4f00242t03.h index bc8677c8eba9..e69e9b51b21a 100644 --- a/include/linux/spi/l4f00242t03.h +++ b/include/linux/spi/l4f00242t03.h @@ -12,10 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _INCLUDE_LINUX_SPI_L4F00242T03_H_ diff --git a/include/linux/spi/lms283gf05.h b/include/linux/spi/lms283gf05.h index 555d254e6606..fdd1d1d51da5 100644 --- a/include/linux/spi/lms283gf05.h +++ b/include/linux/spi/lms283gf05.h @@ -11,10 +11,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _INCLUDE_LINUX_SPI_LMS283GF05_H_ diff --git a/include/linux/spi/mxs-spi.h b/include/linux/spi/mxs-spi.h index 4835486f58e5..381d368b91b4 100644 --- a/include/linux/spi/mxs-spi.h +++ b/include/linux/spi/mxs-spi.h @@ -15,10 +15,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef __LINUX_SPI_MXS_SPI_H__ diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index d5a316550177..5eb56e35cf0d 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -10,10 +10,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __linux_pxa2xx_spi_h #define __linux_pxa2xx_spi_h diff --git a/include/linux/spi/rspi.h b/include/linux/spi/rspi.h index e546b2ceb623..a693188cc08b 100644 --- a/include/linux/spi/rspi.h +++ b/include/linux/spi/rspi.h @@ -11,11 +11,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * */ #ifndef __LINUX_SPI_RENESAS_SPI_H__ diff --git a/include/linux/spi/sh_hspi.h b/include/linux/spi/sh_hspi.h index a1121f872ac1..aa0d440ab4f0 100644 --- a/include/linux/spi/sh_hspi.h +++ b/include/linux/spi/sh_hspi.h @@ -9,10 +9,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef SH_HSPI_H #define SH_HSPI_H diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index a6ef2a8e6de4..6e2664a17114 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -10,10 +10,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __LINUX_SPI_H diff --git a/include/linux/spi/tle62x0.h b/include/linux/spi/tle62x0.h index 60b59187e590..414c6fddfcf0 100644 --- a/include/linux/spi/tle62x0.h +++ b/include/linux/spi/tle62x0.h @@ -12,10 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ struct tle62x0_pdata { diff --git a/include/linux/spi/tsc2005.h b/include/linux/spi/tsc2005.h index 8f721e465e05..563b3b1799a8 100644 --- a/include/linux/spi/tsc2005.h +++ b/include/linux/spi/tsc2005.h @@ -12,11 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * */ #ifndef _LINUX_SPI_TSC2005_H -- cgit v1.2.3 From 556d2f055bf6d79ce81587dfe774d4dd10da473f Mon Sep 17 00:00:00 2001 From: Yalin Wang Date: Mon, 3 Nov 2014 03:01:03 +0100 Subject: ARM: 8187/1: add CONFIG_HAVE_ARCH_BITREVERSE to support rbit instruction this change add CONFIG_HAVE_ARCH_BITREVERSE config option, so that we can use some architecture's bitrev hardware instruction to do bitrev operation. Introduce __constant_bitrev* macro for constant bitrev operation. Change __bitrev16() __bitrev32() to be inline function, don't need export symbol for these tiny functions. Signed-off-by: Yalin Wang Acked-by: Will Deacon Signed-off-by: Russell King --- include/linux/bitrev.h | 77 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitrev.h b/include/linux/bitrev.h index 7ffe03f4693d..fb790b8449c1 100644 --- a/include/linux/bitrev.h +++ b/include/linux/bitrev.h @@ -3,14 +3,83 @@ #include -extern u8 const byte_rev_table[256]; +#ifdef CONFIG_HAVE_ARCH_BITREVERSE +#include + +#define __bitrev32 __arch_bitrev32 +#define __bitrev16 __arch_bitrev16 +#define __bitrev8 __arch_bitrev8 -static inline u8 bitrev8(u8 byte) +#else +extern u8 const byte_rev_table[256]; +static inline u8 __bitrev8(u8 byte) { return byte_rev_table[byte]; } -extern u16 bitrev16(u16 in); -extern u32 bitrev32(u32 in); +static inline u16 __bitrev16(u16 x) +{ + return (__bitrev8(x & 0xff) << 8) | __bitrev8(x >> 8); +} + +static inline u32 __bitrev32(u32 x) +{ + return (__bitrev16(x & 0xffff) << 16) | __bitrev16(x >> 16); +} + +#endif /* CONFIG_HAVE_ARCH_BITREVERSE */ + +#define __constant_bitrev32(x) \ +({ \ + u32 __x = x; \ + __x = (__x >> 16) | (__x << 16); \ + __x = ((__x & (u32)0xFF00FF00UL) >> 8) | ((__x & (u32)0x00FF00FFUL) << 8); \ + __x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4); \ + __x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2); \ + __x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1); \ + __x; \ +}) + +#define __constant_bitrev16(x) \ +({ \ + u16 __x = x; \ + __x = (__x >> 8) | (__x << 8); \ + __x = ((__x & (u16)0xF0F0U) >> 4) | ((__x & (u16)0x0F0FU) << 4); \ + __x = ((__x & (u16)0xCCCCU) >> 2) | ((__x & (u16)0x3333U) << 2); \ + __x = ((__x & (u16)0xAAAAU) >> 1) | ((__x & (u16)0x5555U) << 1); \ + __x; \ +}) + +#define __constant_bitrev8(x) \ +({ \ + u8 __x = x; \ + __x = (__x >> 4) | (__x << 4); \ + __x = ((__x & (u8)0xCCU) >> 2) | ((__x & (u8)0x33U) << 2); \ + __x = ((__x & (u8)0xAAU) >> 1) | ((__x & (u8)0x55U) << 1); \ + __x; \ +}) + +#define bitrev32(x) \ +({ \ + u32 __x = x; \ + __builtin_constant_p(__x) ? \ + __constant_bitrev32(__x) : \ + __bitrev32(__x); \ +}) + +#define bitrev16(x) \ +({ \ + u16 __x = x; \ + __builtin_constant_p(__x) ? \ + __constant_bitrev16(__x) : \ + __bitrev16(__x); \ + }) +#define bitrev8(x) \ +({ \ + u8 __x = x; \ + __builtin_constant_p(__x) ? \ + __constant_bitrev8(__x) : \ + __bitrev8(__x) ; \ + }) #endif /* _LINUX_BITREV_H */ -- cgit v1.2.3 From c4827bb859cbe8afad9287c9dd4e7162119d3d59 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 18 Dec 2014 15:04:21 +0200 Subject: spi: pxa2xx: Add definition for Intel Quark DDS_RATE register Intel Quark DDS_RATE register is defined only in register access macro. Add a definition for it to common SSP register definitions for preparing to cleanup those macros. Signed-off-by: Jarkko Nikula Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 77aed9ea1d26..dab545bb66b3 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -37,6 +37,7 @@ #define SSDR (0x10) /* SSP Data Write/Data Read Register */ #define SSTO (0x28) /* SSP Time Out Register */ +#define DDS_RATE (0x28) /* SSP DDS Clock Rate Register (Intel Quark) */ #define SSPSP (0x2C) /* SSP Programmable Serial Protocol */ #define SSTSA (0x30) /* SSP Tx Timeslot Active */ #define SSRSA (0x34) /* SSP Rx Timeslot Active */ -- cgit v1.2.3 From 32d17597d3e299ffe8b07e3afc12f8074e7ae483 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 9 Apr 2014 08:13:18 -0300 Subject: [media] v4l: vsp1: Remove support for platform data Now that all platforms instantiate the VSP1 through DT, platform data support isn't needed anymore. Signed-off-by: Laurent Pinchart Acked-by: Simon Horman Signed-off-by: Mauro Carvalho Chehab --- include/linux/platform_data/vsp1.h | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 include/linux/platform_data/vsp1.h (limited to 'include/linux') diff --git a/include/linux/platform_data/vsp1.h b/include/linux/platform_data/vsp1.h deleted file mode 100644 index 63170e2614b3..000000000000 --- a/include/linux/platform_data/vsp1.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * vsp1.h -- R-Car VSP1 Platform Data - * - * Copyright (C) 2013 Renesas Corporation - * - * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ -#ifndef __PLATFORM_VSP1_H__ -#define __PLATFORM_VSP1_H__ - -#define VSP1_HAS_LIF (1 << 0) -#define VSP1_HAS_LUT (1 << 1) -#define VSP1_HAS_SRU (1 << 2) - -struct vsp1_platform_data { - unsigned int features; - unsigned int rpf_count; - unsigned int uds_count; - unsigned int wpf_count; -}; - -#endif /* __PLATFORM_VSP1_H__ */ -- cgit v1.2.3 From 3110628d89f80fbafa085fd62e75afcb39fb764c Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 19 Dec 2014 17:15:53 +0900 Subject: spi: sh-msiof: Configure MSIOF sync signal timing in device tree The MSIOF controller has DTDL and SYNCDL in SITMDR1 register. So, this patch adds new properties like the following commit: d0fb47a5237d8b9576113568bacfd27892308b62 (spi: fsl-espi: Configure FSL eSPI CSBEF and CSAFT) Signed-off-by: Yoshihiro Shimoda Signed-off-by: Mark Brown --- include/linux/spi/sh_msiof.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h index 88a14d81c49e..b087a85f5f72 100644 --- a/include/linux/spi/sh_msiof.h +++ b/include/linux/spi/sh_msiof.h @@ -7,6 +7,8 @@ struct sh_msiof_spi_info { u16 num_chipselect; unsigned int dma_tx_id; unsigned int dma_rx_id; + u32 dtdl; + u32 syncdl; }; #endif /* __SPI_SH_MSIOF_H__ */ -- cgit v1.2.3 From 0425e2420c0ab1b5da24f6d9fce39241ad85fc46 Mon Sep 17 00:00:00 2001 From: Flora Fu Date: Fri, 5 Dec 2014 12:07:54 +0800 Subject: regulator: mt6397: Add support for MT6397 regulator Add MT6397 regulator driver. Signed-off-by: Flora Fu Signed-off-by: Mark Brown --- include/linux/regulator/mt6397-regulator.h | 49 ++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 include/linux/regulator/mt6397-regulator.h (limited to 'include/linux') diff --git a/include/linux/regulator/mt6397-regulator.h b/include/linux/regulator/mt6397-regulator.h new file mode 100644 index 000000000000..30cc5963e265 --- /dev/null +++ b/include/linux/regulator/mt6397-regulator.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2014 MediaTek Inc. + * Author: Flora Fu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_REGULATOR_MT6397_H +#define __LINUX_REGULATOR_MT6397_H + +enum { + MT6397_ID_VPCA15 = 0, + MT6397_ID_VPCA7, + MT6397_ID_VSRAMCA15, + MT6397_ID_VSRAMCA7, + MT6397_ID_VCORE, + MT6397_ID_VGPU, + MT6397_ID_VDRM, + MT6397_ID_VIO18 = 7, + MT6397_ID_VTCXO, + MT6397_ID_VA28, + MT6397_ID_VCAMA, + MT6397_ID_VIO28, + MT6397_ID_VUSB, + MT6397_ID_VMC, + MT6397_ID_VMCH, + MT6397_ID_VEMC3V3, + MT6397_ID_VGP1, + MT6397_ID_VGP2, + MT6397_ID_VGP3, + MT6397_ID_VGP4, + MT6397_ID_VGP5, + MT6397_ID_VGP6, + MT6397_ID_VIBR, + MT6397_ID_RG_MAX, +}; + +#define MT6397_MAX_REGULATOR MT6397_ID_RG_MAX +#define MT6397_REGULATOR_ID97 0x97 +#define MT6397_REGULATOR_ID91 0x91 + +#endif /* __LINUX_REGULATOR_MT6397_H */ -- cgit v1.2.3 From 0628ee7c81a8ced9b10f9ee300707f7f79fdecf1 Mon Sep 17 00:00:00 2001 From: Nicholas Krause Date: Sun, 21 Dec 2014 22:36:37 -0500 Subject: libata: s/ata_id_removeable()/ata_id_removable()/ Changes the spelling typos of removeable to removable where ata_id_removeable is defined in ata.h and called in libata-scsi.c respectively. Signed-off-by: Nicholas Krause Signed-off-by: Tejun Heo --- include/linux/ata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index f2f4d8da97c0..1648026e06b4 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -503,7 +503,7 @@ struct ata_bmdma_prd { #define ata_id_has_dma(id) ((id)[ATA_ID_CAPABILITY] & (1 << 8)) #define ata_id_has_ncq(id) ((id)[ATA_ID_SATA_CAPABILITY] & (1 << 8)) #define ata_id_queue_depth(id) (((id)[ATA_ID_QUEUE_DEPTH] & 0x1f) + 1) -#define ata_id_removeable(id) ((id)[ATA_ID_CONFIG] & (1 << 7)) +#define ata_id_removable(id) ((id)[ATA_ID_CONFIG] & (1 << 7)) #define ata_id_has_atapi_AN(id) \ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ -- cgit v1.2.3 From 74d23cc704d19732e70ef1579a669f7d5f09dd9a Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Sun, 21 Dec 2014 19:46:56 +0100 Subject: time: move the timecounter/cyclecounter code into its own file. The timecounter code has almost nothing to do with the clocksource code. Let it live in its own file. This will help isolate the timecounter users from the clocksource users in the source tree. Signed-off-by: Richard Cochran Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/clocksource.h | 102 ------------------------------------ include/linux/mlx4/device.h | 2 +- include/linux/timecounter.h | 122 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/types.h | 3 ++ 4 files changed, 126 insertions(+), 103 deletions(-) create mode 100644 include/linux/timecounter.h (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index abcafaa20b86..9c78d15d33e4 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -18,8 +18,6 @@ #include #include -/* clocksource cycle base type */ -typedef u64 cycle_t; struct clocksource; struct module; @@ -27,106 +25,6 @@ struct module; #include #endif -/** - * struct cyclecounter - hardware abstraction for a free running counter - * Provides completely state-free accessors to the underlying hardware. - * Depending on which hardware it reads, the cycle counter may wrap - * around quickly. Locking rules (if necessary) have to be defined - * by the implementor and user of specific instances of this API. - * - * @read: returns the current cycle value - * @mask: bitmask for two's complement - * subtraction of non 64 bit counters, - * see CLOCKSOURCE_MASK() helper macro - * @mult: cycle to nanosecond multiplier - * @shift: cycle to nanosecond divisor (power of two) - */ -struct cyclecounter { - cycle_t (*read)(const struct cyclecounter *cc); - cycle_t mask; - u32 mult; - u32 shift; -}; - -/** - * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds - * Contains the state needed by timecounter_read() to detect - * cycle counter wrap around. Initialize with - * timecounter_init(). Also used to convert cycle counts into the - * corresponding nanosecond counts with timecounter_cyc2time(). Users - * of this code are responsible for initializing the underlying - * cycle counter hardware, locking issues and reading the time - * more often than the cycle counter wraps around. The nanosecond - * counter will only wrap around after ~585 years. - * - * @cc: the cycle counter used by this instance - * @cycle_last: most recent cycle counter value seen by - * timecounter_read() - * @nsec: continuously increasing count - */ -struct timecounter { - const struct cyclecounter *cc; - cycle_t cycle_last; - u64 nsec; -}; - -/** - * cyclecounter_cyc2ns - converts cycle counter cycles to nanoseconds - * @cc: Pointer to cycle counter. - * @cycles: Cycles - * - * XXX - This could use some mult_lxl_ll() asm optimization. Same code - * as in cyc2ns, but with unsigned result. - */ -static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, - cycle_t cycles) -{ - u64 ret = (u64)cycles; - ret = (ret * cc->mult) >> cc->shift; - return ret; -} - -/** - * timecounter_init - initialize a time counter - * @tc: Pointer to time counter which is to be initialized/reset - * @cc: A cycle counter, ready to be used. - * @start_tstamp: Arbitrary initial time stamp. - * - * After this call the current cycle register (roughly) corresponds to - * the initial time stamp. Every call to timecounter_read() increments - * the time stamp counter by the number of elapsed nanoseconds. - */ -extern void timecounter_init(struct timecounter *tc, - const struct cyclecounter *cc, - u64 start_tstamp); - -/** - * timecounter_read - return nanoseconds elapsed since timecounter_init() - * plus the initial time stamp - * @tc: Pointer to time counter. - * - * In other words, keeps track of time since the same epoch as - * the function which generated the initial time stamp. - */ -extern u64 timecounter_read(struct timecounter *tc); - -/** - * timecounter_cyc2time - convert a cycle counter to same - * time base as values returned by - * timecounter_read() - * @tc: Pointer to time counter. - * @cycle_tstamp: a value returned by tc->cc->read() - * - * Cycle counts that are converted correctly as long as they - * fall into the interval [-1/2 max cycle count, +1/2 max cycle count], - * with "max cycle count" == cs->mask+1. - * - * This allows conversion of cycle counter values which were generated - * in the past. - */ -extern u64 timecounter_cyc2time(struct timecounter *tc, - cycle_t cycle_tstamp); - /** * struct clocksource - hardware abstraction for a free running counter * Provides mostly state-free accessors to the underlying hardware. diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 25c791e295fd..f1e41b33462f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -42,7 +42,7 @@ #include -#include +#include #define MAX_MSIX_P_PORT 17 #define MAX_MSIX 64 diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h new file mode 100644 index 000000000000..146f07a6651b --- /dev/null +++ b/include/linux/timecounter.h @@ -0,0 +1,122 @@ +/* + * linux/include/linux/timecounter.h + * + * based on code that migrated away from + * linux/include/linux/clocksource.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef _LINUX_TIMECOUNTER_H +#define _LINUX_TIMECOUNTER_H + +#include + +/** + * struct cyclecounter - hardware abstraction for a free running counter + * Provides completely state-free accessors to the underlying hardware. + * Depending on which hardware it reads, the cycle counter may wrap + * around quickly. Locking rules (if necessary) have to be defined + * by the implementor and user of specific instances of this API. + * + * @read: returns the current cycle value + * @mask: bitmask for two's complement + * subtraction of non 64 bit counters, + * see CLOCKSOURCE_MASK() helper macro + * @mult: cycle to nanosecond multiplier + * @shift: cycle to nanosecond divisor (power of two) + */ +struct cyclecounter { + cycle_t (*read)(const struct cyclecounter *cc); + cycle_t mask; + u32 mult; + u32 shift; +}; + +/** + * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds + * Contains the state needed by timecounter_read() to detect + * cycle counter wrap around. Initialize with + * timecounter_init(). Also used to convert cycle counts into the + * corresponding nanosecond counts with timecounter_cyc2time(). Users + * of this code are responsible for initializing the underlying + * cycle counter hardware, locking issues and reading the time + * more often than the cycle counter wraps around. The nanosecond + * counter will only wrap around after ~585 years. + * + * @cc: the cycle counter used by this instance + * @cycle_last: most recent cycle counter value seen by + * timecounter_read() + * @nsec: continuously increasing count + */ +struct timecounter { + const struct cyclecounter *cc; + cycle_t cycle_last; + u64 nsec; +}; + +/** + * cyclecounter_cyc2ns - converts cycle counter cycles to nanoseconds + * @cc: Pointer to cycle counter. + * @cycles: Cycles + * + * XXX - This could use some mult_lxl_ll() asm optimization. Same code + * as in cyc2ns, but with unsigned result. + */ +static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, + cycle_t cycles) +{ + u64 ret = (u64)cycles; + ret = (ret * cc->mult) >> cc->shift; + return ret; +} + +/** + * timecounter_init - initialize a time counter + * @tc: Pointer to time counter which is to be initialized/reset + * @cc: A cycle counter, ready to be used. + * @start_tstamp: Arbitrary initial time stamp. + * + * After this call the current cycle register (roughly) corresponds to + * the initial time stamp. Every call to timecounter_read() increments + * the time stamp counter by the number of elapsed nanoseconds. + */ +extern void timecounter_init(struct timecounter *tc, + const struct cyclecounter *cc, + u64 start_tstamp); + +/** + * timecounter_read - return nanoseconds elapsed since timecounter_init() + * plus the initial time stamp + * @tc: Pointer to time counter. + * + * In other words, keeps track of time since the same epoch as + * the function which generated the initial time stamp. + */ +extern u64 timecounter_read(struct timecounter *tc); + +/** + * timecounter_cyc2time - convert a cycle counter to same + * time base as values returned by + * timecounter_read() + * @tc: Pointer to time counter. + * @cycle_tstamp: a value returned by tc->cc->read() + * + * Cycle counts that are converted correctly as long as they + * fall into the interval [-1/2 max cycle count, +1/2 max cycle count], + * with "max cycle count" == cs->mask+1. + * + * This allows conversion of cycle counter values which were generated + * in the past. + */ +extern u64 timecounter_cyc2time(struct timecounter *tc, + cycle_t cycle_tstamp); + +#endif diff --git a/include/linux/types.h b/include/linux/types.h index a0bb7048687f..62323825cff9 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -213,5 +213,8 @@ struct callback_head { }; #define rcu_head callback_head +/* clocksource cycle base type */ +typedef u64 cycle_t; + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_TYPES_H */ -- cgit v1.2.3 From 796c1efd6fa0ed696d550b68f4410ab1a1749d01 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Sun, 21 Dec 2014 19:46:57 +0100 Subject: timecounter: provide a helper function to shift the time. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some PTP Hardware Clock drivers use a struct timecounter to represent their clock. To adjust the time by a given offset, these drivers all perform a two step read/write of their timecounter. However, it is better and simpler just to adjust the offset in one step. This patch introduces a little routine to help drivers implement the adjtime method. Suggested-by: Janusz Użycki Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/timecounter.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h index 146f07a6651b..af3dfa4e90f0 100644 --- a/include/linux/timecounter.h +++ b/include/linux/timecounter.h @@ -78,6 +78,15 @@ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, return ret; } +/** + * timecounter_adjtime - Shifts the time of the clock. + * @delta: Desired change in nanoseconds. + */ +static inline void timecounter_adjtime(struct timecounter *tc, s64 delta) +{ + tc->nsec += delta; +} + /** * timecounter_init - initialize a time counter * @tc: Pointer to time counter which is to be initialized/reset -- cgit v1.2.3 From 2eebdde6528a722fbf8e2cffcf7aa52cbb4c2de0 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Sun, 21 Dec 2014 19:47:06 +0100 Subject: timecounter: keep track of accumulated fractional nanoseconds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current timecounter implementation will drop a variable amount of resolution, depending on the magnitude of the time delta. In other words, reading the clock too often or too close to a time stamp conversion will introduce errors into the time values. This patch fixes the issue by introducing a fractional nanosecond field that accumulates the low order bits. Reported-by: Janusz Użycki Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/timecounter.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h index af3dfa4e90f0..74f45496e6d1 100644 --- a/include/linux/timecounter.h +++ b/include/linux/timecounter.h @@ -55,27 +55,32 @@ struct cyclecounter { * @cycle_last: most recent cycle counter value seen by * timecounter_read() * @nsec: continuously increasing count + * @mask: bit mask for maintaining the 'frac' field + * @frac: accumulated fractional nanoseconds */ struct timecounter { const struct cyclecounter *cc; cycle_t cycle_last; u64 nsec; + u64 mask; + u64 frac; }; /** * cyclecounter_cyc2ns - converts cycle counter cycles to nanoseconds * @cc: Pointer to cycle counter. * @cycles: Cycles - * - * XXX - This could use some mult_lxl_ll() asm optimization. Same code - * as in cyc2ns, but with unsigned result. + * @mask: bit mask for maintaining the 'frac' field + * @frac: pointer to storage for the fractional nanoseconds. */ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, - cycle_t cycles) + cycle_t cycles, u64 mask, u64 *frac) { - u64 ret = (u64)cycles; - ret = (ret * cc->mult) >> cc->shift; - return ret; + u64 ns = (u64) cycles; + + ns = (ns * cc->mult) + *frac; + *frac = ns & mask; + return ns >> cc->shift; } /** -- cgit v1.2.3 From dd450777990baae668c1143064f2f234dbab1b9b Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Wed, 24 Dec 2014 01:14:14 +0300 Subject: arm: sa1100: move irda header to linux/platform_data In the end asm/mach/irda.h header is not used by anybody except sa1100. Move the header to the platform data includes dir and rename it to irda-sa11x0.h. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: David S. Miller --- include/linux/platform_data/irda-sa11x0.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/linux/platform_data/irda-sa11x0.h (limited to 'include/linux') diff --git a/include/linux/platform_data/irda-sa11x0.h b/include/linux/platform_data/irda-sa11x0.h new file mode 100644 index 000000000000..38f77b5e56cf --- /dev/null +++ b/include/linux/platform_data/irda-sa11x0.h @@ -0,0 +1,20 @@ +/* + * arch/arm/include/asm/mach/irda.h + * + * Copyright (C) 2004 Russell King. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_ARM_MACH_IRDA_H +#define __ASM_ARM_MACH_IRDA_H + +struct irda_platform_data { + int (*startup)(struct device *); + void (*shutdown)(struct device *); + int (*set_power)(struct device *, unsigned int state); + void (*set_speed)(struct device *, unsigned int speed); +}; + +#endif -- cgit v1.2.3 From de40ed31b3c577cefd7b54972365a272ecbe9dd6 Mon Sep 17 00:00:00 2001 From: Nimrod Andy Date: Wed, 24 Dec 2014 17:30:39 +0800 Subject: net: fec: add Wake-on-LAN support Support for Wake-on-LAN using Magic Packet. ENET IP supports sleep mode in low power status, when system enter suspend status, Magic packet can wake up system even if all SOC clocks are gate. The patch doing below things: - flagging the device as a wakeup source for the system, as well as its Wake-on-LAN interrupt - prepare the hardware for entering WoL mode - add standard ethtool WOL interface - enable the ENET interrupt to wake us Tested on i.MX6q/dl sabresd, sabreauto boards, i.MX6SX arm2 boards. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- include/linux/fec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fec.h b/include/linux/fec.h index bcff455d1d53..1454a503622d 100644 --- a/include/linux/fec.h +++ b/include/linux/fec.h @@ -19,6 +19,7 @@ struct fec_platform_data { phy_interface_t phy; unsigned char mac[ETH_ALEN]; + void (*sleep_mode_enable)(int enabled); }; #endif -- cgit v1.2.3 From 9b174d88c257150562b0101fcc6cb6c3cb74275c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 30 Dec 2014 19:10:15 -0800 Subject: net: Add Transparent Ethernet Bridging GRO support. Currently the only tunnel protocol that supports GRO with encapsulated Ethernet is VXLAN. This pulls out the Ethernet code into a proper layer so that it can be used by other tunnel protocols such as GRE and Geneve. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 41c891d05f04..1d869d185a0d 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -52,6 +52,10 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1) #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count) +struct sk_buff **eth_gro_receive(struct sk_buff **head, + struct sk_buff *skb); +int eth_gro_complete(struct sk_buff *skb, int nhoff); + /* Reserved Ethernet Addresses per IEEE 802.1Q */ static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; -- cgit v1.2.3 From 1891172aa5c32f08ad9931b794edd71e91a4a527 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Fri, 2 Jan 2015 20:22:03 +0100 Subject: timecounter: provide a macro to initialize the cyclecounter mask field. There is no need for users of the timecounter/cyclecounter code to include clocksource.h just for a single macro. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/timecounter.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h index 74f45496e6d1..4382035a75bb 100644 --- a/include/linux/timecounter.h +++ b/include/linux/timecounter.h @@ -19,6 +19,9 @@ #include +/* simplify initialization of mask field */ +#define CYCLECOUNTER_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) + /** * struct cyclecounter - hardware abstraction for a free running counter * Provides completely state-free accessors to the underlying hardware. @@ -29,7 +32,7 @@ * @read: returns the current cycle value * @mask: bitmask for two's complement * subtraction of non 64 bit counters, - * see CLOCKSOURCE_MASK() helper macro + * see CYCLECOUNTER_MASK() helper macro * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) */ -- cgit v1.2.3 From 8d24c0b43125ec26cc80e04588477a9a2afc025c Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 2 Jan 2015 23:00:14 +0100 Subject: rhashtable: Do hashing inside of rhashtable_lookup_compare() Hash the key inside of rhashtable_lookup_compare() like rhashtable_lookup() does. This allows to simplify the hashing functions and keep them private. Signed-off-by: Thomas Graf Cc: netfilter-devel@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index b93fd89b2e5e..1b51221c6bbd 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -96,9 +96,6 @@ static inline int lockdep_rht_mutex_is_held(const struct rhashtable *ht) int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params); -u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len); -u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr); - void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, @@ -111,7 +108,7 @@ int rhashtable_expand(struct rhashtable *ht); int rhashtable_shrink(struct rhashtable *ht); void *rhashtable_lookup(const struct rhashtable *ht, const void *key); -void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash, +void *rhashtable_lookup_compare(const struct rhashtable *ht, const void *key, bool (*compare)(void *, void *), void *arg); void rhashtable_destroy(const struct rhashtable *ht); -- cgit v1.2.3 From 88d6ed15acff1cb44b1d1f3c0a393b7f7744957a Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 2 Jan 2015 23:00:16 +0100 Subject: rhashtable: Convert bucket iterators to take table and index This patch is in preparation to introduce per bucket spinlocks. It extends all iterator macros to take the bucket table and bucket index. It also introduces a new rht_dereference_bucket() to handle protected accesses to buckets. It introduces a barrier() to the RCU iterators to the prevent the compiler from caching the first element. The lockdep verifier is introduced as stub which always succeeds and properly implement in the next patch when the locks are introduced. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 173 ++++++++++++++++++++++++++++++--------------- 1 file changed, 116 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 1b51221c6bbd..b54e24a08806 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -87,11 +87,18 @@ struct rhashtable { #ifdef CONFIG_PROVE_LOCKING int lockdep_rht_mutex_is_held(const struct rhashtable *ht); +int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash); #else static inline int lockdep_rht_mutex_is_held(const struct rhashtable *ht) { return 1; } + +static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, + u32 hash) +{ + return 1; +} #endif /* CONFIG_PROVE_LOCKING */ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params); @@ -119,92 +126,144 @@ void rhashtable_destroy(const struct rhashtable *ht); #define rht_dereference_rcu(p, ht) \ rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht)) -#define rht_entry(ptr, type, member) container_of(ptr, type, member) -#define rht_entry_safe(ptr, type, member) \ -({ \ - typeof(ptr) __ptr = (ptr); \ - __ptr ? rht_entry(__ptr, type, member) : NULL; \ -}) +#define rht_dereference_bucket(p, tbl, hash) \ + rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash)) -#define rht_next_entry_safe(pos, ht, member) \ -({ \ - pos ? rht_entry_safe(rht_dereference((pos)->member.next, ht), \ - typeof(*(pos)), member) : NULL; \ -}) +#define rht_dereference_bucket_rcu(p, tbl, hash) \ + rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash)) + +#define rht_entry(tpos, pos, member) \ + ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) /** - * rht_for_each - iterate over hash chain - * @pos: &struct rhash_head to use as a loop cursor. - * @head: head of the hash chain (struct rhash_head *) - * @ht: pointer to your struct rhashtable + * rht_for_each_continue - continue iterating over hash chain + * @pos: the &struct rhash_head to use as a loop cursor. + * @head: the previous &struct rhash_head to continue from + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index */ -#define rht_for_each(pos, head, ht) \ - for (pos = rht_dereference(head, ht); \ +#define rht_for_each_continue(pos, head, tbl, hash) \ + for (pos = rht_dereference_bucket(head, tbl, hash); \ pos; \ - pos = rht_dereference((pos)->next, ht)) + pos = rht_dereference_bucket((pos)->next, tbl, hash)) + +/** + * rht_for_each - iterate over hash chain + * @pos: the &struct rhash_head to use as a loop cursor. + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + */ +#define rht_for_each(pos, tbl, hash) \ + rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash) + +/** + * rht_for_each_entry_continue - continue iterating over hash chain + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rhash_head to use as a loop cursor. + * @head: the previous &struct rhash_head to continue from + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * @member: name of the &struct rhash_head within the hashable struct. + */ +#define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member) \ + for (pos = rht_dereference_bucket(head, tbl, hash); \ + pos && rht_entry(tpos, pos, member); \ + pos = rht_dereference_bucket((pos)->next, tbl, hash)) /** * rht_for_each_entry - iterate over hash chain of given type - * @pos: type * to use as a loop cursor. - * @head: head of the hash chain (struct rhash_head *) - * @ht: pointer to your struct rhashtable - * @member: name of the rhash_head within the hashable struct. + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rhash_head to use as a loop cursor. + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * @member: name of the &struct rhash_head within the hashable struct. */ -#define rht_for_each_entry(pos, head, ht, member) \ - for (pos = rht_entry_safe(rht_dereference(head, ht), \ - typeof(*(pos)), member); \ - pos; \ - pos = rht_next_entry_safe(pos, ht, member)) +#define rht_for_each_entry(tpos, pos, tbl, hash, member) \ + rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash], \ + tbl, hash, member) /** * rht_for_each_entry_safe - safely iterate over hash chain of given type - * @pos: type * to use as a loop cursor. - * @n: type * to use for temporary next object storage - * @head: head of the hash chain (struct rhash_head *) - * @ht: pointer to your struct rhashtable - * @member: name of the rhash_head within the hashable struct. + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rhash_head to use as a loop cursor. + * @next: the &struct rhash_head to use as next in loop cursor. + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * @member: name of the &struct rhash_head within the hashable struct. * * This hash chain list-traversal primitive allows for the looped code to * remove the loop cursor from the list. */ -#define rht_for_each_entry_safe(pos, n, head, ht, member) \ - for (pos = rht_entry_safe(rht_dereference(head, ht), \ - typeof(*(pos)), member), \ - n = rht_next_entry_safe(pos, ht, member); \ - pos; \ - pos = n, \ - n = rht_next_entry_safe(pos, ht, member)) +#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ + for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \ + next = pos ? rht_dereference_bucket(pos->next, tbl, hash) \ + : NULL; \ + pos && rht_entry(tpos, pos, member); \ + pos = next) + +/** + * rht_for_each_rcu_continue - continue iterating over rcu hash chain + * @pos: the &struct rhash_head to use as a loop cursor. + * @head: the previous &struct rhash_head to continue from + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * + * This hash chain list-traversal primitive may safely run concurrently with + * the _rcu mutation primitives such as rhashtable_insert() as long as the + * traversal is guarded by rcu_read_lock(). + */ +#define rht_for_each_rcu_continue(pos, head, tbl, hash) \ + for (({barrier(); }), \ + pos = rht_dereference_bucket_rcu(head, tbl, hash); \ + pos; \ + pos = rcu_dereference_raw(pos->next)) /** * rht_for_each_rcu - iterate over rcu hash chain - * @pos: &struct rhash_head to use as a loop cursor. - * @head: head of the hash chain (struct rhash_head *) - * @ht: pointer to your struct rhashtable + * @pos: the &struct rhash_head to use as a loop cursor. + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index * * This hash chain list-traversal primitive may safely run concurrently with - * the _rcu fkht mutation primitives such as rht_insert() as long as the + * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_rcu(pos, head, ht) \ - for (pos = rht_dereference_rcu(head, ht); \ - pos; \ - pos = rht_dereference_rcu((pos)->next, ht)) +#define rht_for_each_rcu(pos, tbl, hash) \ + rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash) + +/** + * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rhash_head to use as a loop cursor. + * @head: the previous &struct rhash_head to continue from + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * @member: name of the &struct rhash_head within the hashable struct. + * + * This hash chain list-traversal primitive may safely run concurrently with + * the _rcu mutation primitives such as rhashtable_insert() as long as the + * traversal is guarded by rcu_read_lock(). + */ +#define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \ + for (({barrier(); }), \ + pos = rht_dereference_bucket_rcu(head, tbl, hash); \ + pos && rht_entry(tpos, pos, member); \ + pos = rht_dereference_bucket_rcu(pos->next, tbl, hash)) /** * rht_for_each_entry_rcu - iterate over rcu hash chain of given type - * @pos: type * to use as a loop cursor. - * @head: head of the hash chain (struct rhash_head *) - * @member: name of the rhash_head within the hashable struct. + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rhash_head to use as a loop cursor. + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * @member: name of the &struct rhash_head within the hashable struct. * * This hash chain list-traversal primitive may safely run concurrently with - * the _rcu fkht mutation primitives such as rht_insert() as long as the + * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_entry_rcu(pos, head, member) \ - for (pos = rht_entry_safe(rcu_dereference_raw(head), \ - typeof(*(pos)), member); \ - pos; \ - pos = rht_entry_safe(rcu_dereference_raw((pos)->member.next), \ - typeof(*(pos)), member)) +#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ + rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ + tbl, hash, member) #endif /* _LINUX_RHASHTABLE_H */ -- cgit v1.2.3 From 897362e446436d245972e72c6bc5b33bd7a5c659 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 2 Jan 2015 23:00:18 +0100 Subject: nft_hash: Remove rhashtable_remove_pprev() The removal function of nft_hash currently stores a reference to the previous element during lookup which is used to optimize removal later on. This was possible because a lock is held throughout calling rhashtable_lookup() and rhashtable_remove(). With the introdution of deferred table resizing in parallel to lookups and insertions, the nftables lock will no longer synchronize all table mutations and the stored pprev may become invalid. Removing this optimization makes removal slightly more expensive on average but allows taking the resize cost out of the insert and remove path. Signed-off-by: Thomas Graf Cc: netfilter-devel@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index b54e24a08806..f624d4b5045f 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -105,8 +105,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params); void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); -void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, - struct rhash_head __rcu **pprev); bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size); bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size); -- cgit v1.2.3 From 113948d841e8d78039e5dbbb5248f5b73e99eafa Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 2 Jan 2015 23:00:19 +0100 Subject: spinlock: Add spin_lock_bh_nested() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/spinlock.h | 8 ++++++++ include/linux/spinlock_api_smp.h | 2 ++ include/linux/spinlock_api_up.h | 1 + 3 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 262ba4ef9a8e..3e18379dfa6f 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -190,6 +190,8 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) #ifdef CONFIG_DEBUG_LOCK_ALLOC # define raw_spin_lock_nested(lock, subclass) \ _raw_spin_lock_nested(lock, subclass) +# define raw_spin_lock_bh_nested(lock, subclass) \ + _raw_spin_lock_bh_nested(lock, subclass) # define raw_spin_lock_nest_lock(lock, nest_lock) \ do { \ @@ -205,6 +207,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) # define raw_spin_lock_nested(lock, subclass) \ _raw_spin_lock(((void)(subclass), (lock))) # define raw_spin_lock_nest_lock(lock, nest_lock) _raw_spin_lock(lock) +# define raw_spin_lock_bh_nested(lock, subclass) _raw_spin_lock_bh(lock) #endif #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) @@ -324,6 +327,11 @@ do { \ raw_spin_lock_nested(spinlock_check(lock), subclass); \ } while (0) +#define spin_lock_bh_nested(lock, subclass) \ +do { \ + raw_spin_lock_bh_nested(spinlock_check(lock), subclass);\ +} while (0) + #define spin_lock_nest_lock(lock, nest_lock) \ do { \ raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 42dfab89e740..5344268e6e62 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -22,6 +22,8 @@ int in_lock_functions(unsigned long addr); void __lockfunc _raw_spin_lock(raw_spinlock_t *lock) __acquires(lock); void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) __acquires(lock); +void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass) + __acquires(lock); void __lockfunc _raw_spin_lock_nest_lock(raw_spinlock_t *lock, struct lockdep_map *map) __acquires(lock); diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h index d0d188861ad6..d3afef9d8dbe 100644 --- a/include/linux/spinlock_api_up.h +++ b/include/linux/spinlock_api_up.h @@ -57,6 +57,7 @@ #define _raw_spin_lock(lock) __LOCK(lock) #define _raw_spin_lock_nested(lock, subclass) __LOCK(lock) +#define _raw_spin_lock_bh_nested(lock, subclass) __LOCK(lock) #define _raw_read_lock(lock) __LOCK(lock) #define _raw_write_lock(lock) __LOCK(lock) #define _raw_spin_lock_bh(lock) __LOCK_BH(lock) -- cgit v1.2.3 From 97defe1ecf868b8127f8e62395499d6a06e4c4b1 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 2 Jan 2015 23:00:20 +0100 Subject: rhashtable: Per bucket locks & deferred expansion/shrinking Introduces an array of spinlocks to protect bucket mutations. The number of spinlocks per CPU is configurable and selected based on the hash of the bucket. This allows for parallel insertions and removals of entries which do not share a lock. The patch also defers expansion and shrinking to a worker queue which allows insertion and removal from atomic context. Insertions and deletions may occur in parallel to it and are only held up briefly while the particular bucket is linked or unzipped. Mutations of the bucket table pointer is protected by a new mutex, read access is RCU protected. In the event of an expansion or shrinking, the new bucket table allocated is exposed as a so called future table as soon as the resize process starts. Lookups, deletions, and insertions will briefly use both tables. The future table becomes the main table after an RCU grace period and initial linking of the old to the new table was performed. Optimization of the chains to make use of the new number of buckets follows only the new table is in use. The side effect of this is that during that RCU grace period, a bucket traversal using any rht_for_each() variant on the main table will not see any insertions performed during the RCU grace period which would at that point land in the future table. The lookup will see them as it searches both tables if needed. Having multiple insertions and removals occur in parallel requires nelems to become an atomic counter. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f624d4b5045f..a1688f0a6193 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -19,6 +19,7 @@ #define _LINUX_RHASHTABLE_H #include +#include struct rhash_head { struct rhash_head __rcu *next; @@ -26,8 +27,17 @@ struct rhash_head { #define INIT_HASH_HEAD(ptr) ((ptr)->next = NULL) +/** + * struct bucket_table - Table of hash buckets + * @size: Number of hash buckets + * @locks_mask: Mask to apply before accessing locks[] + * @locks: Array of spinlocks protecting individual buckets + * @buckets: size * hash buckets + */ struct bucket_table { size_t size; + unsigned int locks_mask; + spinlock_t *locks; struct rhash_head __rcu *buckets[]; }; @@ -45,11 +55,11 @@ struct rhashtable; * @hash_rnd: Seed to use while hashing * @max_shift: Maximum number of shifts while expanding * @min_shift: Minimum number of shifts while shrinking + * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Function to hash key * @obj_hashfn: Function to hash object * @grow_decision: If defined, may return true if table should expand * @shrink_decision: If defined, may return true if table should shrink - * @mutex_is_held: Must return true if protecting mutex is held */ struct rhashtable_params { size_t nelem_hint; @@ -59,37 +69,42 @@ struct rhashtable_params { u32 hash_rnd; size_t max_shift; size_t min_shift; + size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; bool (*grow_decision)(const struct rhashtable *ht, size_t new_size); bool (*shrink_decision)(const struct rhashtable *ht, size_t new_size); -#ifdef CONFIG_PROVE_LOCKING - int (*mutex_is_held)(void *parent); - void *parent; -#endif }; /** * struct rhashtable - Hash table handle * @tbl: Bucket table + * @future_tbl: Table under construction during expansion/shrinking * @nelems: Number of elements in table * @shift: Current size (1 << shift) * @p: Configuration parameters + * @run_work: Deferred worker to expand/shrink asynchronously + * @mutex: Mutex to protect current/future table swapping + * @being_destroyed: True if table is set up for destruction */ struct rhashtable { struct bucket_table __rcu *tbl; - size_t nelems; + struct bucket_table __rcu *future_tbl; + atomic_t nelems; size_t shift; struct rhashtable_params p; + struct delayed_work run_work; + struct mutex mutex; + bool being_destroyed; }; #ifdef CONFIG_PROVE_LOCKING -int lockdep_rht_mutex_is_held(const struct rhashtable *ht); +int lockdep_rht_mutex_is_held(struct rhashtable *ht); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash); #else -static inline int lockdep_rht_mutex_is_held(const struct rhashtable *ht) +static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht) { return 1; } @@ -112,11 +127,11 @@ bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size); int rhashtable_expand(struct rhashtable *ht); int rhashtable_shrink(struct rhashtable *ht); -void *rhashtable_lookup(const struct rhashtable *ht, const void *key); -void *rhashtable_lookup_compare(const struct rhashtable *ht, const void *key, +void *rhashtable_lookup(struct rhashtable *ht, const void *key); +void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, bool (*compare)(void *, void *), void *arg); -void rhashtable_destroy(const struct rhashtable *ht); +void rhashtable_destroy(struct rhashtable *ht); #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) -- cgit v1.2.3 From f89bd6f87a53ce5a7d60662429591ebac2745c10 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 2 Jan 2015 23:00:21 +0100 Subject: rhashtable: Supports for nulls marker In order to allow for wider usage of rhashtable, use a special nulls marker to terminate each chain. The reason for not using the existing nulls_list is that the prev pointer usage would not be valid as entries can be linked in two different buckets at the same time. The 4 nulls base bits can be set through the rhashtable_params structure like this: struct rhashtable_params params = { [...] .nulls_base = (1U << RHT_BASE_SHIFT), }; This reduces the hash length from 32 bits to 27 bits. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/list_nulls.h | 3 ++- include/linux/rhashtable.h | 57 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 49 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index 5d10ae364b5e..e8c300e06438 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -21,8 +21,9 @@ struct hlist_nulls_head { struct hlist_nulls_node { struct hlist_nulls_node *next, **pprev; }; +#define NULLS_MARKER(value) (1UL | (((long)value) << 1)) #define INIT_HLIST_NULLS_HEAD(ptr, nulls) \ - ((ptr)->first = (struct hlist_nulls_node *) (1UL | (((long)nulls) << 1))) + ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls)) #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) /** diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index a1688f0a6193..de7cac753b09 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -18,15 +18,32 @@ #ifndef _LINUX_RHASHTABLE_H #define _LINUX_RHASHTABLE_H -#include +#include #include +/* + * The end of the chain is marked with a special nulls marks which has + * the following format: + * + * +-------+-----------------------------------------------------+-+ + * | Base | Hash |1| + * +-------+-----------------------------------------------------+-+ + * + * Base (4 bits) : Reserved to distinguish between multiple tables. + * Specified via &struct rhashtable_params.nulls_base. + * Hash (27 bits): Full hash (unmasked) of first element added to bucket + * 1 (1 bit) : Nulls marker (always set) + * + * The remaining bits of the next pointer remain unused for now. + */ +#define RHT_BASE_BITS 4 +#define RHT_HASH_BITS 27 +#define RHT_BASE_SHIFT RHT_HASH_BITS + struct rhash_head { struct rhash_head __rcu *next; }; -#define INIT_HASH_HEAD(ptr) ((ptr)->next = NULL) - /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets @@ -55,6 +72,7 @@ struct rhashtable; * @hash_rnd: Seed to use while hashing * @max_shift: Maximum number of shifts while expanding * @min_shift: Minimum number of shifts while shrinking + * @nulls_base: Base value to generate nulls marker * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Function to hash key * @obj_hashfn: Function to hash object @@ -69,6 +87,7 @@ struct rhashtable_params { u32 hash_rnd; size_t max_shift; size_t min_shift; + u32 nulls_base; size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; @@ -100,6 +119,24 @@ struct rhashtable { bool being_destroyed; }; +static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash) +{ + return NULLS_MARKER(ht->p.nulls_base + hash); +} + +#define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \ + ((ptr) = (typeof(ptr)) rht_marker(ht, hash)) + +static inline bool rht_is_a_nulls(const struct rhash_head *ptr) +{ + return ((unsigned long) ptr & 1); +} + +static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr) +{ + return ((unsigned long) ptr) >> 1; +} + #ifdef CONFIG_PROVE_LOCKING int lockdep_rht_mutex_is_held(struct rhashtable *ht); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash); @@ -157,7 +194,7 @@ void rhashtable_destroy(struct rhashtable *ht); */ #define rht_for_each_continue(pos, head, tbl, hash) \ for (pos = rht_dereference_bucket(head, tbl, hash); \ - pos; \ + !rht_is_a_nulls(pos); \ pos = rht_dereference_bucket((pos)->next, tbl, hash)) /** @@ -180,7 +217,7 @@ void rhashtable_destroy(struct rhashtable *ht); */ #define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member) \ for (pos = rht_dereference_bucket(head, tbl, hash); \ - pos && rht_entry(tpos, pos, member); \ + (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = rht_dereference_bucket((pos)->next, tbl, hash)) /** @@ -209,9 +246,9 @@ void rhashtable_destroy(struct rhashtable *ht); */ #define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \ - next = pos ? rht_dereference_bucket(pos->next, tbl, hash) \ - : NULL; \ - pos && rht_entry(tpos, pos, member); \ + next = !rht_is_a_nulls(pos) ? \ + rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ + (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = next) /** @@ -228,7 +265,7 @@ void rhashtable_destroy(struct rhashtable *ht); #define rht_for_each_rcu_continue(pos, head, tbl, hash) \ for (({barrier(); }), \ pos = rht_dereference_bucket_rcu(head, tbl, hash); \ - pos; \ + !rht_is_a_nulls(pos); \ pos = rcu_dereference_raw(pos->next)) /** @@ -260,7 +297,7 @@ void rhashtable_destroy(struct rhashtable *ht); #define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \ for (({barrier(); }), \ pos = rht_dereference_bucket_rcu(head, tbl, hash); \ - pos && rht_entry(tpos, pos, member); \ + (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = rht_dereference_bucket_rcu(pos->next, tbl, hash)) /** -- cgit v1.2.3 From 86b35b64ed7b6b38305dee67a0f2ddff2ca5455d Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Sun, 4 Jan 2015 15:25:09 +0800 Subject: rhashtable: fix missing header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixup below build error: include/linux/rhashtable.h: At top level: include/linux/rhashtable.h:118:34: error: field ‘mutex’ has incomplete type Signed-off-by: Ying Xue Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index de7cac753b09..de1459c74c4d 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -20,6 +20,7 @@ #include #include +#include /* * The end of the chain is marked with a special nulls marks which has -- cgit v1.2.3 From a3449ded128d037e6b2bd7a59b0741de56506066 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Sun, 4 Jan 2015 15:24:35 +0800 Subject: list_nulls: fix missing header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixup below build error: include/linux/list_nulls.h: In function ‘hlist_nulls_del’: include/linux/list_nulls.h:84:13: error: ‘LIST_POISON2’ undeclared (first use in this function) Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- include/linux/list_nulls.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index e8c300e06438..f266661d2666 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -1,6 +1,9 @@ #ifndef _LINUX_LIST_NULLS_H #define _LINUX_LIST_NULLS_H +#include +#include + /* * Special version of lists, where end of list is not a NULL pointer, * but a 'nulls' marker, which can have many different values. -- cgit v1.2.3 From 224d019c4fbba242041e9b25a926ba873b7da1e2 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 5 Jan 2015 13:56:14 -0800 Subject: ip: Move checksum convert defines to inet Move convert_csum from udp_sock to inet_sock. This allows the possibility that we can use convert checksum for different types of sockets and also allows convert checksum to be enabled from inet layer (what we'll want to do when enabling IP_CHECKSUM cmsg). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/udp.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index ee3277593222..247cfdcc4b08 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -49,11 +49,7 @@ struct udp_sock { unsigned int corkflag; /* Cork is required */ __u8 encap_type; /* Is this an Encapsulation socket? */ unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ - no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ - convert_csum:1;/* On receive, convert checksum - * unnecessary to checksum complete - * if possible. - */ + no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -102,16 +98,6 @@ static inline bool udp_get_no_check6_rx(struct sock *sk) return udp_sk(sk)->no_check6_rx; } -static inline void udp_set_convert_csum(struct sock *sk, bool val) -{ - udp_sk(sk)->convert_csum = val; -} - -static inline bool udp_get_convert_csum(struct sock *sk) -{ - return udp_sk(sk)->convert_csum; -} - #define udp_portaddr_for_each_entry(__sk, node, list) \ hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node) -- cgit v1.2.3 From 6810e4a394f9d781050107529b8d1465c00b7b13 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 6 Jan 2015 10:26:10 -0500 Subject: percpu_ref: remove unnecessary ACCESS_ONCE() in percpu_ref_tryget_live() __ref_is_percpu() needs the implied ACCESS_ONCE() in lockless_dereference() on @ref->percpu_count_ptr because the value is tested for !__PERCPU_REF_ATOMIC, which may be set asynchronously, and then used as a pointer. If the compiler generates a separate fetch when using it as a pointer, __PERCPU_REF_ATOMIC may be set in between contaminating the pointer value. percpu_ref_tryget_live() also uses ACCESS_ONCE() to test __PERCPU_REF_DEAD; however, there's no reason for this. I just copied ACCESS_ONCE() usage blindly from __ref_is_percpu(). All it does is confusing people trying to understand what's going on. This patch removes the unnecessary ACCESS_ONCE() usage from percpu_ref_tryget_live() and adds a comment explaining why __ref_is_percpu() needs it. Signed-off-by: Tejun Heo Cc: Kent Overstreet --- include/linux/percpu-refcount.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index b4337646388b..6a7a670366ab 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -128,8 +128,22 @@ static inline void percpu_ref_kill(struct percpu_ref *ref) static inline bool __ref_is_percpu(struct percpu_ref *ref, unsigned long __percpu **percpu_countp) { - /* paired with smp_store_release() in percpu_ref_reinit() */ - unsigned long percpu_ptr = lockless_dereference(ref->percpu_count_ptr); + unsigned long percpu_ptr; + + /* + * The value of @ref->percpu_count_ptr is tested for + * !__PERCPU_REF_ATOMIC, which may be set asynchronously, and then + * used as a pointer. If the compiler generates a separate fetch + * when using it as a pointer, __PERCPU_REF_ATOMIC may be set in + * between contaminating the pointer value, meaning that + * ACCESS_ONCE() is required when fetching it. + * + * Also, we need a data dependency barrier to be paired with + * smp_store_release() in __percpu_ref_switch_to_percpu(). + * + * Use lockless deref which contains both. + */ + percpu_ptr = lockless_dereference(ref->percpu_count_ptr); /* * Theoretically, the following could test just ATOMIC; however, @@ -233,7 +247,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) if (__ref_is_percpu(ref, &percpu_count)) { this_cpu_inc(*percpu_count); ret = true; - } else if (!(ACCESS_ONCE(ref->percpu_count_ptr) & __PERCPU_REF_DEAD)) { + } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) { ret = atomic_long_inc_not_zero(&ref->count); } -- cgit v1.2.3 From 4c907baf36d8339f393bb576d0bab29194d0e6ad Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 6 Jan 2015 10:26:10 -0500 Subject: percpu_ref: implement percpu_ref_is_dying() Implement percpu_ref_is_dying() which tests whether the ref is dying or dead. This is useful to determine the current state when a percpu_ref is used as a cyclic on/off switch via kill and reinit. Signed-off-by: Tejun Heo Cc: Kent Overstreet --- include/linux/percpu-refcount.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 6a7a670366ab..12c9b485beb7 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -294,6 +294,20 @@ static inline void percpu_ref_put(struct percpu_ref *ref) percpu_ref_put_many(ref, 1); } +/** + * percpu_ref_is_dying - test whether a percpu refcount is dying or dead + * @ref: percpu_ref to test + * + * Returns %true if @ref is dying or dead. + * + * This function is safe to call as long as @ref is between init and exit + * and the caller is responsible for synchronizing against state changes. + */ +static inline bool percpu_ref_is_dying(struct percpu_ref *ref) +{ + return ref->percpu_count_ptr & __PERCPU_REF_DEAD; +} + /** * percpu_ref_is_zero - test whether a percpu refcount reached zero * @ref: percpu_ref to test -- cgit v1.2.3 From 24dab7a7b3534ef40ecec20cfd7fb3ad99d9ff33 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 6 Jan 2015 12:02:46 -0500 Subject: cgroup: reorder SUBSYS(blkio) in cgroup_subsys.h The scheduled cgroup writeback support requires blkio to be initialized before memcg as memcg needs to provide certain blkcg related functionalities. Relocate blkio so that it's right above memory. Signed-off-by: Tejun Heo --- include/linux/cgroup_subsys.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 98c4f9b12b03..e4a96fb14403 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -15,6 +15,10 @@ SUBSYS(cpu) SUBSYS(cpuacct) #endif +#if IS_ENABLED(CONFIG_BLK_CGROUP) +SUBSYS(blkio) +#endif + #if IS_ENABLED(CONFIG_MEMCG) SUBSYS(memory) #endif @@ -31,10 +35,6 @@ SUBSYS(freezer) SUBSYS(net_cls) #endif -#if IS_ENABLED(CONFIG_BLK_CGROUP) -SUBSYS(blkio) -#endif - #if IS_ENABLED(CONFIG_CGROUP_PERF) SUBSYS(perf_event) #endif -- cgit v1.2.3 From f3ba53802eff25e3eedb60d7afe5262710e20bd5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 6 Jan 2015 12:02:46 -0500 Subject: cgroup: add dummy css_put() for !CONFIG_CGROUPS This will later be depended upon by the scheduled cgroup writeback support. Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index da0dae0600e6..b9cb94c3102a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -943,6 +943,8 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, #else /* !CONFIG_CGROUPS */ +struct cgroup_subsys_state; + static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } static inline void cgroup_fork(struct task_struct *p) {} @@ -955,6 +957,8 @@ static inline int cgroupstats_build(struct cgroupstats *stats, return -EINVAL; } +static inline void css_put(struct cgroup_subsys_state *css) {} + /* No cgroups - nothing to do */ static inline int cgroup_attach_task_all(struct task_struct *from, struct task_struct *t) -- cgit v1.2.3 From 536fa402221f09633e7c5801b327055ab716a363 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 5 Sep 2014 11:14:48 -0700 Subject: compiler: Allow 1- and 2-byte smp_load_acquire() and smp_store_release() CPUs without single-byte and double-byte loads and stores place some "interesting" requirements on concurrent code. For example (adapted from Peter Hurley's test code), suppose we have the following structure: struct foo { spinlock_t lock1; spinlock_t lock2; char a; /* Protected by lock1. */ char b; /* Protected by lock2. */ }; struct foo *foop; Of course, it is common (and good) practice to place data protected by different locks in separate cache lines. However, if the locks are rarely acquired (for example, only in rare error cases), and there are a great many instances of the data structure, then memory footprint can trump false-sharing concerns, so that it can be better to place them in the same cache cache line as above. But if the CPU does not support single-byte loads and stores, a store to foop->a will do a non-atomic read-modify-write operation on foop->b, which will come as a nasty surprise to someone holding foop->lock2. So we now require CPUs to support single-byte and double-byte loads and stores. Therefore, this commit adjusts the definition of __native_word() to allow these sizes to be used by smp_load_acquire() and smp_store_release(). Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index a1c81f80978e..49811cdddaa5 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -385,7 +385,7 @@ static __always_inline void __assign_once_size(volatile void *p, void *res, int /* Is this type a native word size -- useful for atomic operations */ #ifndef __native_word -# define __native_word(t) (sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) +# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) #endif /* Compile time object size, -1 for unknown */ -- cgit v1.2.3 From ac59853c06993a442e8060bc19040b2ca3025aec Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Thu, 13 Nov 2014 14:24:14 -0500 Subject: rcupdate: Replace smp_read_barrier_depends() with lockless_dereference() Recently lockless_dereference() was added which can be used in place of hard-coding smp_read_barrier_depends(). The following PATCH makes the change. Signed-off-by: Pranith Kumar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ed4f5939a452..386ba288084a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -582,11 +582,11 @@ static inline void rcu_preempt_sleep_check(void) }) #define __rcu_dereference_check(p, c, space) \ ({ \ - typeof(*p) *_________p1 = (typeof(*p) *__force)ACCESS_ONCE(p); \ + /* Dependency order vs. p above. */ \ + typeof(*p) *________p1 = (typeof(*p) *__force)lockless_dereference(p); \ rcu_lockdep_assert(c, "suspicious rcu_dereference_check() usage"); \ rcu_dereference_sparse(p, space); \ - smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ - ((typeof(*p) __force __kernel *)(_________p1)); \ + ((typeof(*p) __force __kernel *)(________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ @@ -603,10 +603,10 @@ static inline void rcu_preempt_sleep_check(void) }) #define __rcu_dereference_index_check(p, c) \ ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ + /* Dependency order vs. p above. */ \ + typeof(p) _________p1 = lockless_dereference(p); \ rcu_lockdep_assert(c, \ "suspicious rcu_dereference_index_check() usage"); \ - smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ (_________p1); \ }) -- cgit v1.2.3 From f520c98e3e5212d8c282a86d9b7697dd70326192 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 12 Dec 2014 09:36:14 +0800 Subject: rculist: Fix sparse warning This fixes the following sparse warnings: make C=1 CF=-D__CHECK_ENDIAN__ net/ipv6/addrconf.o net/ipv6/addrconf.c:3495:9: error: incompatible types in comparison expression (different address spaces) net/ipv6/addrconf.c:3495:9: error: incompatible types in comparison expression (different address spaces) net/ipv6/addrconf.c:3495:9: error: incompatible types in comparison expression (different address spaces) net/ipv6/addrconf.c:3495:9: error: incompatible types in comparison expression (different address spaces) To silence these spare complaints, an RCU annotation should be added to "next" pointer of hlist_node structure through hlist_next_rcu() macro when iterating over a hlist with hlist_for_each_entry_continue_rcu_bh(). By the way, this commit also resolves the same error appearing in hlist_for_each_entry_continue_rcu(). Signed-off-by: Ying Xue Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 529bc946f450..a18b16f1dc0e 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -524,11 +524,11 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, * @member: the name of the hlist_node within the struct. */ #define hlist_for_each_entry_continue_rcu(pos, member) \ - for (pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ - typeof(*(pos)), member); \ + for (pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \ + &(pos)->member)), typeof(*(pos)), member); \ pos; \ - pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ - typeof(*(pos)), member)) + pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \ + &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_continue_rcu_bh - iterate over a hlist continuing after current point @@ -536,11 +536,11 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, * @member: the name of the hlist_node within the struct. */ #define hlist_for_each_entry_continue_rcu_bh(pos, member) \ - for (pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\ - typeof(*(pos)), member); \ + for (pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu( \ + &(pos)->member)), typeof(*(pos)), member); \ pos; \ - pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\ - typeof(*(pos)), member)) + pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu( \ + &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_from_rcu - iterate over a hlist continuing from current point -- cgit v1.2.3 From a5c198f4f7da6cc48116ca239c59c9f44b753364 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 23 Nov 2014 20:30:06 -0800 Subject: rcu: Expand SRCU ->completed to 64 bits When rcutorture used only the low-order 32 bits of the grace-period number, it was not a problem for SRCU to use a 32-bit completed field. However, rcutorture now uses the full 64 bits on 64-bit systems, so this commit converts SRCU's ->completed field to unsigned long so as to provide 64 bits on 64-bit systems. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index a2783cb5d275..ef923dd96249 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -45,7 +45,7 @@ struct rcu_batch { #define RCU_BATCH_INIT(name) { NULL, &(name.head) } struct srcu_struct { - unsigned completed; + unsigned long completed; struct srcu_struct_array __percpu *per_cpu_ref; spinlock_t queue_lock; /* protect ->batch_queue, ->running */ bool running; @@ -135,7 +135,7 @@ int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); void synchronize_srcu(struct srcu_struct *sp); void synchronize_srcu_expedited(struct srcu_struct *sp); -long srcu_batches_completed(struct srcu_struct *sp); +unsigned long srcu_batches_completed(struct srcu_struct *sp); void srcu_barrier(struct srcu_struct *sp); #ifdef CONFIG_DEBUG_LOCK_ALLOC -- cgit v1.2.3 From 9735af5c78599703be633c057af3faee26482028 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 26 Nov 2014 10:42:50 -0800 Subject: rcu: Combine DEFINE_SRCU() and DEFINE_STATIC_SRCU() The DEFINE_SRCU() and DEFINE_STATIC_SRCU() definitions are quite similar, so this commit combines them, saving a bit of code and removing redundancy. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index ef923dd96249..9cfd9623fb03 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -102,13 +102,11 @@ void process_srcu(struct work_struct *work); * define and init a srcu struct at build time. * dont't call init_srcu_struct() nor cleanup_srcu_struct() on it. */ -#define DEFINE_SRCU(name) \ +#define __DEFINE_SRCU(name, is_static) \ static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ - struct srcu_struct name = __SRCU_STRUCT_INIT(name); - -#define DEFINE_STATIC_SRCU(name) \ - static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ - static struct srcu_struct name = __SRCU_STRUCT_INIT(name); + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) +#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) +#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) /** * call_srcu() - Queue a callback for invocation after an SRCU grace period -- cgit v1.2.3 From 8b3a38daff6f50027039d6979b9eb026907508eb Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Tue, 23 Dec 2014 19:04:23 +0100 Subject: brcmfmac: Add support for bcm43340/1 wireless chipsets This patch adds support for the bcm43340 and bcm43341 wireless chipsets. These two chipsets are identical from wireless parts perspective. As such they use the same firmware image. Cc: Samuel Ortiz Cc: Rob Herring Signed-off-by: John Stultz [arend@broadcom.com: squash to single commit, remove 43341 chipid] Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- include/linux/mmc/sdio_ids.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 0f01fe065424..996807963716 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -24,13 +24,15 @@ * Vendors and devices. Sort key: vendor first, device next. */ #define SDIO_VENDOR_ID_BROADCOM 0x02d0 -#define SDIO_DEVICE_ID_BROADCOM_43143 43143 +#define SDIO_DEVICE_ID_BROADCOM_43143 0xa887 #define SDIO_DEVICE_ID_BROADCOM_43241 0x4324 #define SDIO_DEVICE_ID_BROADCOM_4329 0x4329 #define SDIO_DEVICE_ID_BROADCOM_4330 0x4330 #define SDIO_DEVICE_ID_BROADCOM_4334 0x4334 +#define SDIO_DEVICE_ID_BROADCOM_43340 0xa94c +#define SDIO_DEVICE_ID_BROADCOM_43341 0xa94d #define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335 -#define SDIO_DEVICE_ID_BROADCOM_43362 43362 +#define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 #define SDIO_DEVICE_ID_BROADCOM_4354 0x4354 #define SDIO_VENDOR_ID_INTEL 0x0089 -- cgit v1.2.3 From 2f4383667d57d1c719070db86b14277277752841 Mon Sep 17 00:00:00 2001 From: Ed Swierk Date: Fri, 2 Jan 2015 17:27:56 -0800 Subject: ethtool: Extend ethtool plugin module eeprom API to phylib This patch extends the ethtool plugin module eeprom API to support cards whose phy support is delegated to a separate driver. The handlers for ETHTOOL_GMODULEINFO and ETHTOOL_GMODULEEEPROM call the module_info and module_eeprom functions if the phy driver provides them; otherwise the handlers call the equivalent ethtool_ops functions provided by network drivers with built-in phy support. Signed-off-by: Ed Swierk Signed-off-by: David S. Miller --- include/linux/phy.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 22af8f8f5802..9c189a1fa3a2 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -565,6 +565,15 @@ struct phy_driver { void (*write_mmd_indirect)(struct phy_device *dev, int ptrad, int devnum, int regnum, u32 val); + /* Get the size and type of the eeprom contained within a plug-in + * module */ + int (*module_info)(struct phy_device *dev, + struct ethtool_modinfo *modinfo); + + /* Get the eeprom information from the plug-in module */ + int (*module_eeprom)(struct phy_device *dev, + struct ethtool_eeprom *ee, u8 *data); + struct device_driver driver; }; #define to_phy_driver(d) container_of(d, struct phy_driver, driver) -- cgit v1.2.3 From 9da7dae94fb8adab5cc5f395640e30736a66e910 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Tue, 6 Jan 2015 17:29:29 +0100 Subject: workqueue.h: remove loops of single statement macros checkpatch.pl complained about two single statement macros in do while (0) loops. The loops and the trailing semicolons are now removed, which makes checkpatch happy and the two macros consistent with the rest of the file. Signed-off-by: Valentin Rothberg Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b996e6cde6bb..74db135f9957 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -220,14 +220,10 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #endif #define INIT_WORK(_work, _func) \ - do { \ - __INIT_WORK((_work), (_func), 0); \ - } while (0) + __INIT_WORK((_work), (_func), 0) #define INIT_WORK_ONSTACK(_work, _func) \ - do { \ - __INIT_WORK((_work), (_func), 1); \ - } while (0) + __INIT_WORK((_work), (_func), 1) #define __INIT_DELAYED_WORK(_work, _func, _tflags) \ do { \ -- cgit v1.2.3 From 6ada1fc0e1c4775de0e043e1bd3ae9d065491aa5 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 3 Dec 2014 19:22:48 -0500 Subject: time: settimeofday: Validate the values of tv from user An unvalidated user input is multiplied by a constant, which can result in an undefined behaviour for large values. While this is validated later, we should avoid triggering undefined behaviour. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: stable Signed-off-by: Sasha Levin [jstultz: include trivial milisecond->microsecond correction noticed by Andy] Signed-off-by: John Stultz --- include/linux/time.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 8c42cf8d2444..5989b0ead1ec 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -99,6 +99,19 @@ static inline bool timespec_valid_strict(const struct timespec *ts) return true; } +static inline bool timeval_valid(const struct timeval *tv) +{ + /* Dates before 1970 are bogus */ + if (tv->tv_sec < 0) + return false; + + /* Can't have more microseconds then a second */ + if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC) + return false; + + return true; +} + extern struct timespec timespec_trunc(struct timespec t, unsigned gran); #define CURRENT_TIME (current_kernel_time()) -- cgit v1.2.3 From 26e022727f5e88c6e5054e14d954425deacbe56a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 18 Dec 2014 16:02:17 +0100 Subject: efi: Rename efi_guid_unparse to efi_guid_to_str Call it what it does - "unparse" is plain-misleading. Signed-off-by: Borislav Petkov Signed-off-by: Ricardo Neri --- include/linux/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 0949f9c7e872..d762c81e62a8 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -844,7 +844,7 @@ efi_guidcmp (efi_guid_t left, efi_guid_t right) } static inline char * -efi_guid_unparse(efi_guid_t *guid, char *out) +efi_guid_to_str(efi_guid_t *guid, char *out) { sprintf(out, "%pUl", guid->b); return out; -- cgit v1.2.3 From e61f7d1c3c07a7e51036b0796749edb00deff845 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Thu, 8 Jan 2015 10:34:27 -0500 Subject: libata: Whitelist SSDs that are known to properly return zeroes after TRIM As defined, the DRAT (Deterministic Read After Trim) and RZAT (Return Zero After Trim) flags in the ATA Command Set are unreliable in the sense that they only define what happens if the device successfully executed the DSM TRIM command. TRIM is only advisory, however, and the device is free to silently ignore all or parts of the request. In practice this renders the DRAT and RZAT flags completely useless and because the results are unpredictable we decided to disable discard in MD for 3.18 to avoid the risk of data corruption. Hardware vendors in the real world obviously need better guarantees than what the standards bodies provide. Unfortuntely those guarantees are encoded in product requirements documents rather than somewhere we can key off of them programatically. So we are compelled to disabling discard_zeroes_data for all devices unless we explicitly have data to support whitelisting them. This patch whitelists SSDs from a few of the main vendors. None of the whitelists are based on written guarantees. They are purely based on empirical evidence collected from internal and external users that have tested or qualified these drives in RAID deployments. The whitelist is only meant as a starting point and is by no means comprehensive: - All intel SSD models except for 510 - Micron M5?0/M600 - Samsung SSDs - Seagate SSDs Signed-off-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Signed-off-by: Tejun Heo --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 2d182413b1db..f2b440e44fd7 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -422,6 +422,7 @@ enum { ATA_HORKAGE_NO_NCQ_TRIM = (1 << 19), /* don't use queued TRIM */ ATA_HORKAGE_NOLPM = (1 << 20), /* don't use LPM */ ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21), /* some WDs have broken LPM */ + ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From ad26aa6c60974acf3228ed0ade97ba5793093dbe Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Thu, 8 Jan 2015 11:04:07 +0900 Subject: regulator: s2mps11: Fix wrong calculation of register offset This patch adds missing registers('BUCK7_SW' & 'LDO29_CTRL'). Since BUCK7 has 1 more register (BUCK7_SW) than others, register offset should be added one more for which has bigger address than BUCK7 registers. Fixes: 76b9840b24ae04(regulator: s2mps11: Add support S2MPS13 regulator device) Signed-off-by: Jonghwa Lee Signed-off-by: Chanwoo Choi Signed-off-by: Mark Brown Cc: --- include/linux/mfd/samsung/s2mps13.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/samsung/s2mps13.h b/include/linux/mfd/samsung/s2mps13.h index ce5dda8958fe..b1fd675fa36f 100644 --- a/include/linux/mfd/samsung/s2mps13.h +++ b/include/linux/mfd/samsung/s2mps13.h @@ -59,6 +59,7 @@ enum s2mps13_reg { S2MPS13_REG_B6CTRL, S2MPS13_REG_B6OUT, S2MPS13_REG_B7CTRL, + S2MPS13_REG_B7SW, S2MPS13_REG_B7OUT, S2MPS13_REG_B8CTRL, S2MPS13_REG_B8OUT, @@ -102,6 +103,7 @@ enum s2mps13_reg { S2MPS13_REG_L26CTRL, S2MPS13_REG_L27CTRL, S2MPS13_REG_L28CTRL, + S2MPS13_REG_L29CTRL, S2MPS13_REG_L30CTRL, S2MPS13_REG_L31CTRL, S2MPS13_REG_L32CTRL, -- cgit v1.2.3 From bfa21a0dfe6915dc85953b5d40ea9dae5fdf205f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 5 Jan 2015 12:48:42 +0100 Subject: regulator: Allow parsing custom properties when using simplified DT parsing When drivers use simplified DT parsing method (they provide 'regulator_desc.of_match') they still may want to parse custom properties for some of the regulators. For example some of the regulators support GPIO enable control. Add a driver-supplied callback for such case. This way the regulator core parses common bindings offloading a lot of code from drivers and still custom properties may be used. The callback, called for each parsed regulator, may modify the 'regulator_config' initially passed to regulator_register(). Signed-off-by: Krzysztof Kozlowski Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 5f1e9ca47417..d4ad5b5a02bb 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -21,6 +21,7 @@ struct regmap; struct regulator_dev; +struct regulator_config; struct regulator_init_data; struct regulator_enable_gpio; @@ -205,6 +206,15 @@ enum regulator_type { * @supply_name: Identifying the regulator supply * @of_match: Name used to identify regulator in DT. * @regulators_node: Name of node containing regulator definitions in DT. + * @of_parse_cb: Optional callback called only if of_match is present. + * Will be called for each regulator parsed from DT, during + * init_data parsing. + * The regulator_config passed as argument to the callback will + * be a copy of config passed to regulator_register, valid only + * for this particular call. Callback may freely change the + * config but it cannot store it for later usage. + * Callback should return 0 on success or negative ERRNO + * indicating failure. * @id: Numerical identifier for the regulator. * @ops: Regulator operations table. * @irq: Interrupt number for the regulator. @@ -251,6 +261,9 @@ struct regulator_desc { const char *supply_name; const char *of_match; const char *regulators_node; + int (*of_parse_cb)(struct device_node *, + const struct regulator_desc *, + struct regulator_config *); int id; bool continuous_voltage_range; unsigned n_voltages; -- cgit v1.2.3 From db30485408326a6f466a843b291b23535f63eda0 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 7 Jan 2015 13:41:54 +0800 Subject: rhashtable: involve rhashtable_lookup_insert routine Involve a new function called rhashtable_lookup_insert() which makes lookup and insertion atomic under bucket lock protection, helping us avoid to introduce an extra lock when we search and insert an object into hash table. Signed-off-by: Ying Xue Signed-off-by: Thomas Graf Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index de1459c74c4d..73c913f31574 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -168,6 +168,7 @@ int rhashtable_shrink(struct rhashtable *ht); void *rhashtable_lookup(struct rhashtable *ht, const void *key); void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, bool (*compare)(void *, void *), void *arg); +bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj); void rhashtable_destroy(struct rhashtable *ht); -- cgit v1.2.3 From c0c09bfdc4150b3918526660768585cd477adf35 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 7 Jan 2015 13:41:56 +0800 Subject: rhashtable: avoid unnecessary wakeup for worker queue Move condition statements of verifying whether hash table size exceeds its maximum threshold or reaches its minimum threshold from resizing functions to resizing decision functions, avoiding unnecessary wakeup for worker queue thread. Signed-off-by: Ying Xue Cc: Thomas Graf Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 73c913f31574..326acd8c2e9f 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -113,7 +113,7 @@ struct rhashtable { struct bucket_table __rcu *tbl; struct bucket_table __rcu *future_tbl; atomic_t nelems; - size_t shift; + atomic_t shift; struct rhashtable_params p; struct delayed_work run_work; struct mutex mutex; -- cgit v1.2.3 From e5a7a72cd51a585b8f1a1e299bf88fff44b94440 Mon Sep 17 00:00:00 2001 From: Robin Gong Date: Fri, 9 Jan 2015 09:57:33 +0800 Subject: regulator: pfuze100-regulator: add pfuze3000 support Add pfuze3000 chip support. Signed-off-by: Robin Gong Signed-off-by: Mark Brown --- include/linux/regulator/pfuze100.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/pfuze100.h b/include/linux/regulator/pfuze100.h index 364f7a7c43db..70c6c66c5bcf 100644 --- a/include/linux/regulator/pfuze100.h +++ b/include/linux/regulator/pfuze100.h @@ -49,6 +49,20 @@ #define PFUZE200_VGEN5 11 #define PFUZE200_VGEN6 12 +#define PFUZE3000_SW1A 0 +#define PFUZE3000_SW1B 1 +#define PFUZE3000_SW2 2 +#define PFUZE3000_SW3 3 +#define PFUZE3000_SWBST 4 +#define PFUZE3000_VSNVS 5 +#define PFUZE3000_VREFDDR 6 +#define PFUZE3000_VLDO1 7 +#define PFUZE3000_VLDO2 8 +#define PFUZE3000_VCCSD 9 +#define PFUZE3000_V33 10 +#define PFUZE3000_VLDO3 11 +#define PFUZE3000_VLDO4 12 + struct regulator_init_data; struct pfuze_regulator_platform_data { -- cgit v1.2.3 From ed09dcc8bd7fe0991af7737e675996cbd022f38f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 30 Dec 2014 14:46:14 -0800 Subject: ses: close potential registration race The slot and address fields have a small window of instability when userspace can read them before initialization. Separate enclosure_component allocation from registration. Signed-off-by: Dan Williams Signed-off-by: Song Liu Reviewed-by: Jens Axboe Cc: Hannes Reinecke Signed-off-by: Christoph Hellwig --- include/linux/enclosure.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h index 9a33c5f7e126..a835d335b924 100644 --- a/include/linux/enclosure.h +++ b/include/linux/enclosure.h @@ -120,8 +120,9 @@ enclosure_register(struct device *, const char *, int, struct enclosure_component_callbacks *); void enclosure_unregister(struct enclosure_device *); struct enclosure_component * -enclosure_component_register(struct enclosure_device *, unsigned int, - enum enclosure_component_type, const char *); +enclosure_component_alloc(struct enclosure_device *, unsigned int, + enum enclosure_component_type, const char *); +int enclosure_component_register(struct enclosure_component *); int enclosure_add_device(struct enclosure_device *enclosure, int component, struct device *dev); int enclosure_remove_device(struct enclosure_device *, struct device *); -- cgit v1.2.3 From 967f7bab0eaaa74d7d01a56d45aa309f78fb87dd Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 30 Dec 2014 14:46:16 -0800 Subject: ses: add enclosure logical id Export the NAA logical id for the enclosure. This is optionally available from the sas_transport_class, but it is really a property of the enclosure. Signed-off-by: Dan Williams Signed-off-by: Song Liu Reviewed-by: Jens Axboe Cc: Hannes Reinecke Signed-off-by: Christoph Hellwig --- include/linux/enclosure.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h index a835d335b924..807622b252a4 100644 --- a/include/linux/enclosure.h +++ b/include/linux/enclosure.h @@ -79,6 +79,7 @@ struct enclosure_component_callbacks { int (*set_locate)(struct enclosure_device *, struct enclosure_component *, enum enclosure_component_setting); + int (*show_id)(struct enclosure_device *, char *buf); }; -- cgit v1.2.3 From 921ce7f5786052749a22a75780f5ce1a456bcdc6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 30 Dec 2014 14:46:17 -0800 Subject: ses: add reliable slot attribute The name provided by firmware is in a vendor specific format, publish the slot number to have a reliable mechanism for identifying slots across firmware implementations. If the enclosure does not provide a slot number fallback to the component number which is guaranteed unique, and usually mirrors the slot number. Cleaned up the unused ses_component.desc in the process. Signed-off-by: Dan Williams Signed-off-by: Song Liu Reviewed-by: Jens Axboe Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- include/linux/enclosure.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h index 807622b252a4..0f826c14a337 100644 --- a/include/linux/enclosure.h +++ b/include/linux/enclosure.h @@ -92,6 +92,7 @@ struct enclosure_component { int fault; int active; int locate; + int slot; enum enclosure_status status; }; -- cgit v1.2.3 From 08024885a2a3ed432716e9d50046a620a5b2df05 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 30 Dec 2014 14:46:18 -0800 Subject: ses: Add power_status to SES device slot Add power_status to SES device slot, so we can power on/off the HDDs behind the enclosure. Check firmware status in ses_set_* before sending control pages to firmware. Signed-off-by: Song Liu Acked-by: Dan Williams Reviewed-by: Jens Axboe Cc: Hannes Reinecke Signed-off-by: Christoph Hellwig --- include/linux/enclosure.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h index 0f826c14a337..7be22da321f3 100644 --- a/include/linux/enclosure.h +++ b/include/linux/enclosure.h @@ -79,6 +79,11 @@ struct enclosure_component_callbacks { int (*set_locate)(struct enclosure_device *, struct enclosure_component *, enum enclosure_component_setting); + void (*get_power_status)(struct enclosure_device *, + struct enclosure_component *); + int (*set_power_status)(struct enclosure_device *, + struct enclosure_component *, + int); int (*show_id)(struct enclosure_device *, char *buf); }; @@ -94,6 +99,7 @@ struct enclosure_component { int locate; int slot; enum enclosure_status status; + int power_status; }; struct enclosure_device { -- cgit v1.2.3 From 9733e4f0a973a354034f5dd603b4142a3095c85f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Nov 2014 12:49:13 -0800 Subject: rcu: Make _batches_completed() functions return unsigned long Long ago, the various ->completed fields were of type long, but now are unsigned long due to signed-integer-overflow concerns. However, the various _batches_completed() functions remained of type long, even though their only purpose in life is to return the corresponding ->completed field. This patch cleans this up by changing these functions' return types to unsigned long. Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 4 ++-- include/linux/rcutree.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 0e5366200154..91f7e4c37800 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -94,7 +94,7 @@ static inline void rcu_virt_note_context_switch(int cpu) /* * Return the number of grace periods. */ -static inline long rcu_batches_completed(void) +static inline unsigned long rcu_batches_completed(void) { return 0; } @@ -102,7 +102,7 @@ static inline long rcu_batches_completed(void) /* * Return the number of bottom-half grace periods. */ -static inline long rcu_batches_completed_bh(void) +static inline unsigned long rcu_batches_completed_bh(void) { return 0; } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 52953790dcca..9885bfb6b123 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -81,9 +81,9 @@ void cond_synchronize_rcu(unsigned long oldstate); extern unsigned long rcutorture_testseq; extern unsigned long rcutorture_vernum; -long rcu_batches_completed(void); -long rcu_batches_completed_bh(void); -long rcu_batches_completed_sched(void); +unsigned long rcu_batches_completed(void); +unsigned long rcu_batches_completed_bh(void); +unsigned long rcu_batches_completed_sched(void); void show_rcu_gp_kthreads(void); void rcu_force_quiescent_state(void); -- cgit v1.2.3 From c1fe9cde4ae904fffb5b4d975d0a37e99136ff50 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Nov 2014 15:45:27 -0800 Subject: rcu: Provide rcu_batches_completed_sched() for TINY_RCU A bug in rcutorture has caused it to ignore completed batches. In preparation for fixing that bug, this commit provides TINY_RCU with the required rcu_batches_completed_sched(). Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 91f7e4c37800..1ce2d6b8f0c3 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -107,6 +107,14 @@ static inline unsigned long rcu_batches_completed_bh(void) return 0; } +/* + * Return the number of sched grace periods. + */ +static inline unsigned long rcu_batches_completed_sched(void) +{ + return 0; +} + static inline void rcu_force_quiescent_state(void) { } -- cgit v1.2.3 From 917963d0b30f9c4153c372c165178501d97b6b55 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Nov 2014 17:10:16 -0800 Subject: rcutorture: Check from beginning to end of grace period Currently, rcutorture's Reader Batch checks measure from the end of the previous grace period to the end of the current one. This commit tightens up these checks by measuring from the start and end of the same grace period. This involves adding rcu_batches_started() and friends corresponding to the existing rcu_batches_completed() and friends. We leave SRCU alone for the moment, as it does not yet have a way of tracking both ends of its grace periods. Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 30 +++++++++++++++++++++++++++--- include/linux/rcutree.h | 3 +++ 2 files changed, 30 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 1ce2d6b8f0c3..984192160e9b 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -92,7 +92,31 @@ static inline void rcu_virt_note_context_switch(int cpu) } /* - * Return the number of grace periods. + * Return the number of grace periods started. + */ +static inline unsigned long rcu_batches_started(void) +{ + return 0; +} + +/* + * Return the number of bottom-half grace periods started. + */ +static inline unsigned long rcu_batches_started_bh(void) +{ + return 0; +} + +/* + * Return the number of sched grace periods started. + */ +static inline unsigned long rcu_batches_started_sched(void) +{ + return 0; +} + +/* + * Return the number of grace periods completed. */ static inline unsigned long rcu_batches_completed(void) { @@ -100,7 +124,7 @@ static inline unsigned long rcu_batches_completed(void) } /* - * Return the number of bottom-half grace periods. + * Return the number of bottom-half grace periods completed. */ static inline unsigned long rcu_batches_completed_bh(void) { @@ -108,7 +132,7 @@ static inline unsigned long rcu_batches_completed_bh(void) } /* - * Return the number of sched grace periods. + * Return the number of sched grace periods completed. */ static inline unsigned long rcu_batches_completed_sched(void) { diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 9885bfb6b123..c0dd124e69ec 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -81,6 +81,9 @@ void cond_synchronize_rcu(unsigned long oldstate); extern unsigned long rcutorture_testseq; extern unsigned long rcutorture_vernum; +unsigned long rcu_batches_started(void); +unsigned long rcu_batches_started_bh(void); +unsigned long rcu_batches_started_sched(void); unsigned long rcu_batches_completed(void); unsigned long rcu_batches_completed_bh(void); unsigned long rcu_batches_completed_sched(void); -- cgit v1.2.3 From 31c89c959667194350f496947b576c149503ce98 Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Fri, 9 Jan 2015 07:43:45 -0800 Subject: pinctrl: pinconf-generic: Infer map type from DT property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the new 'groups' property, the DT parser can infer the map type from the fact whether 'pins' or 'groups' is used to specify the pin group to work on. To maintain backwards compatibitliy with current usage of the DT binding, this is only done when PIN_MAP_TYPE_INVALID is passed to the parsing function as type. Also, a new helper 'pinconf_generic_dt_node_to_map_all()' is introduced, which can be used by drivers as generic callback for dt_node_to_map() to leverage the new feature. Changes since v2: - rename dt_pin_specifier to subnode_target_type - add additional comment in header file explaining passing an invalid map type - mention map_all() helper in commit message Changes since RFC v2: - none Signed-off-by: Soren Brinkmann Tested-by: Andreas Färber Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index d578a60eff23..83c89f5ab705 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -174,6 +174,17 @@ static inline int pinconf_generic_dt_node_to_map_pin( PIN_MAP_TYPE_CONFIGS_PIN); } +static inline int pinconf_generic_dt_node_to_map_all( + struct pinctrl_dev *pctldev, struct device_node *np_config, + struct pinctrl_map **map, unsigned *num_maps) +{ + /* + * passing the type as PIN_MAP_TYPE_INVALID causes the underlying parser + * to infer the map type from the DT properties used. + */ + return pinconf_generic_dt_node_to_map(pctldev, np_config, map, num_maps, + PIN_MAP_TYPE_INVALID); +} #endif #endif /* CONFIG_GENERIC_PINCONF */ -- cgit v1.2.3 From dd4d01f7bad886c22687224bc7070b87de8deb51 Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Fri, 9 Jan 2015 07:43:46 -0800 Subject: pinctrl: pinconf-generic: Allow driver to specify DT params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Additionally to the generic DT parameters, allow drivers to provide driver-specific DT parameters to be used with the generic parser infrastructure. To achieve this 'struct pinctrl_desc' is extended to pass custom pinconf option to the core. In order to pass this kind of information, the related data structures - 'struct pinconf_generic_dt_params', 'pin_config_item' - are moved from pinconf internals to the pinconf-generic header. Additionally pinconfg-generic is refactored to not only iterate over the generic pinconf parameters but also take the parameters into account that are provided through the driver's 'struct pinctrl_desc'. In particular 'pinconf_generic_parse_dt_config()' and 'pinconf_generic_dump' helpers are split into two parts each. In order to have a more generic helper that can be used to process the generic parameters as well as the driver-specific ones. v2: - fix typo - add missing documentation for @conf_items member in struct - rebase to pinctrl/devel: conflict in abx500 - rename _pinconf_generic_dump() to pinconf_generic_dump_one() - removed '_' from _parse_dt_cfg() - removed BUG_ONs, error condition is handled in if statements - removed pinconf_generic_dump_group() & pinconf_generic_dump_pin helpers - fixed up corresponding call sites - renamed pinconf_generic_dump() to pinconf_generic_dump_pins() - added kernel-doc to pinconf_generic_dump_pins() - add kernel-doc - more verbose commit message Signed-off-by: Soren Brinkmann Tested-by: Andreas Färber Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 18 ++++++++++++++++++ include/linux/pinctrl/pinctrl.h | 9 +++++++++ 2 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 83c89f5ab705..342409f7f3ec 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -115,6 +115,18 @@ enum pin_config_param { PIN_CONFIG_END = 0x7FFF, }; +#ifdef CONFIG_DEBUG_FS +#define PCONFDUMP(a, b, c, d) { .param = a, .display = b, .format = c, \ + .has_arg = d } + +struct pin_config_item { + const enum pin_config_param param; + const char * const display; + const char * const format; + bool has_arg; +}; +#endif /* CONFIG_DEBUG_FS */ + /* * Helpful configuration macro to be used in tables etc. */ @@ -150,6 +162,12 @@ static inline unsigned long pinconf_to_config_packed(enum pin_config_param param struct pinctrl_dev; struct pinctrl_map; +struct pinconf_generic_dt_params { + const char * const property; + enum pin_config_param param; + u32 default_value; +}; + int pinconf_generic_dt_subnode_to_map(struct pinctrl_dev *pctldev, struct device_node *np, struct pinctrl_map **map, unsigned *reserved_maps, unsigned *num_maps, diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index cc8e1aff0e28..c58b3e11ba8e 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -24,6 +24,7 @@ struct pinctrl_dev; struct pinctrl_map; struct pinmux_ops; struct pinconf_ops; +struct pin_config_item; struct gpio_chip; struct device_node; @@ -117,6 +118,9 @@ struct pinctrl_ops { * @confops: pin config operations vtable, if you support pin configuration in * your driver * @owner: module providing the pin controller, used for refcounting + * @num_dt_params: Number of driver-specific DT parameters + * @params: List of DT parameters + * @conf_items: Information how to print @params in debugfs */ struct pinctrl_desc { const char *name; @@ -126,6 +130,11 @@ struct pinctrl_desc { const struct pinmux_ops *pmxops; const struct pinconf_ops *confops; struct module *owner; +#if defined(CONFIG_GENERIC_PINCONF) && defined(CONFIG_OF) + unsigned int num_dt_params; + const struct pinconf_generic_dt_params *params; + const struct pin_config_item *conf_items; +#endif }; /* External interface to pin controller */ -- cgit v1.2.3 From 7bb68410ef22067b08fd52887875b8f337f89dcc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 18 Oct 2014 15:04:15 +0200 Subject: efi: split off remapping code from efi_config_init() Split of the remapping code from efi_config_init() so that the caller can perform its own remapping. This is necessary to correctly handle virtually remapped UEFI memory regions under kexec, as efi.systab will have been updated to a virtual address. Acked-by: Matt Fleming Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 0238d612750e..5ffe5115951f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -875,6 +875,8 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned lon #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int efi_config_init(efi_config_table_type_t *arch_tables); +extern int efi_config_parse_tables(void *config_tables, int count, int sz, + efi_config_table_type_t *arch_tables); extern u64 efi_get_iobase (void); extern u32 efi_mem_type (unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr); -- cgit v1.2.3 From cbf6ab52add20b845f903decc973afbd5463c527 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 5 Jan 2015 19:29:32 +0800 Subject: kprobes: Pass the original kprobe for preparing optimized kprobe Pass the original kprobe for preparing an optimized kprobe arch-dep part, since for some architecture (e.g. ARM32) requires the information in original kprobe. Signed-off-by: Masami Hiramatsu Signed-off-by: Wang Nan Signed-off-by: Jon Medhurst --- include/linux/kprobes.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 5297f9fa0ef2..1ab54754a86d 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -308,7 +308,8 @@ struct optimized_kprobe { /* Architecture dependent functions for direct jump optimization */ extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn); extern int arch_check_optimized_kprobe(struct optimized_kprobe *op); -extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op); +extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, + struct kprobe *orig); extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op); extern void arch_optimize_kprobes(struct list_head *oplist); extern void arch_unoptimize_kprobes(struct list_head *oplist, -- cgit v1.2.3 From 7a868d1e9ab3c534c5ad44e3e5dc46753a1e5636 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 12 Jan 2015 14:52:22 +0800 Subject: rhashtable: involve rhashtable_lookup_compare_insert routine Introduce a new function called rhashtable_lookup_compare_insert() which is very similar to rhashtable_lookup_insert(). But the former makes use of users' given compare function to look for an object, and then inserts it into hash table if found. As the entire process of search and insertion is under protection of per bucket lock, this can help users to avoid the involvement of extra lock. Signed-off-by: Ying Xue Cc: Thomas Graf Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 326acd8c2e9f..7b9bd77ed684 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -168,7 +168,12 @@ int rhashtable_shrink(struct rhashtable *ht); void *rhashtable_lookup(struct rhashtable *ht, const void *key); void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, bool (*compare)(void *, void *), void *arg); + bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj); +bool rhashtable_lookup_compare_insert(struct rhashtable *ht, + struct rhash_head *obj, + bool (*compare)(void *, void *), + void *arg); void rhashtable_destroy(struct rhashtable *ht); -- cgit v1.2.3 From 6f73d3b13dc5e16ae06025cd1b12a36b2857caa2 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 12 Jan 2015 14:52:24 +0800 Subject: rhashtable: add a note for grow and shrink decision functions As commit c0c09bfdc415 ("rhashtable: avoid unnecessary wakeup for worker queue") moves condition statements of verifying whether hash table size exceeds its maximum threshold or reaches its minimum threshold from resizing functions to resizing decision functions, we should add a note in rhashtable.h to indicate the implementation of what the grow and shrink decision function must enforce min/max shift, otherwise, it's failed to take min/max shift's set watermarks into effect. Signed-off-by: Ying Xue Cc: Thomas Graf Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 7b9bd77ed684..9570832ab07c 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -79,6 +79,10 @@ struct rhashtable; * @obj_hashfn: Function to hash object * @grow_decision: If defined, may return true if table should expand * @shrink_decision: If defined, may return true if table should shrink + * + * Note: when implementing the grow and shrink decision function, min/max + * shift must be enforced, otherwise, resizing watermarks they set may be + * useless. */ struct rhashtable_params { size_t nelem_hint; -- cgit v1.2.3 From df8a39defad46b83694ea6dd868d332976d62cc0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 13 Jan 2015 17:13:44 +0100 Subject: net: rename vlan_tx_* helpers since "tx" is misleading there The same macros are used for rx as well. So rename it. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 515a35e2a48a..bea465f24ebb 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -78,9 +78,9 @@ static inline bool is_vlan_dev(struct net_device *dev) return dev->priv_flags & IFF_802_1Q_VLAN; } -#define vlan_tx_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT) -#define vlan_tx_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) -#define vlan_tx_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) +#define skb_vlan_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT) +#define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) +#define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) /** * struct vlan_pcpu_stats - VLAN percpu rx/tx stats @@ -376,7 +376,7 @@ static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb, static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb) { skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto, - vlan_tx_tag_get(skb)); + skb_vlan_tag_get(skb)); if (likely(skb)) skb->vlan_tci = 0; return skb; @@ -393,7 +393,7 @@ static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb) */ static inline struct sk_buff *vlan_hwaccel_push_inside(struct sk_buff *skb) { - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) skb = __vlan_hwaccel_push_inside(skb); return skb; } @@ -442,8 +442,8 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { - if (vlan_tx_tag_present(skb)) { - *vlan_tci = vlan_tx_tag_get(skb); + if (skb_vlan_tag_present(skb)) { + *vlan_tci = skb_vlan_tag_get(skb); return 0; } else { *vlan_tci = 0; @@ -480,7 +480,7 @@ static inline __be16 vlan_get_protocol(const struct sk_buff *skb) { __be16 protocol = 0; - if (vlan_tx_tag_present(skb) || + if (skb_vlan_tag_present(skb) || skb->protocol != cpu_to_be16(ETH_P_8021Q)) protocol = skb->protocol; else { -- cgit v1.2.3 From f684e4ac9f4bae4e6ecff92eef9645a44764fc04 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 12 Jan 2015 00:45:55 +0100 Subject: pinctrl: pinconf-generic: loose DT dependence New pin controllers such as ACPI-based may also have custom properties to parse, and should be able to use generic pin config. Let's make the code compile on !OF systems and rename members a bit to underscore it is custom parameters and not necessarily DT parameters. This fixes a build regression for x86_64 on the zeroday kernel builds. Reported-by: kbuild test robot Reviewed-and-tested-by: Soren Brinkmann Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 2 +- include/linux/pinctrl/pinctrl.h | 17 ++++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 342409f7f3ec..fe65962b264f 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -162,7 +162,7 @@ static inline unsigned long pinconf_to_config_packed(enum pin_config_param param struct pinctrl_dev; struct pinctrl_map; -struct pinconf_generic_dt_params { +struct pinconf_generic_params { const char * const property; enum pin_config_param param; u32 default_value; diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index c58b3e11ba8e..66e4697516de 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -118,9 +118,12 @@ struct pinctrl_ops { * @confops: pin config operations vtable, if you support pin configuration in * your driver * @owner: module providing the pin controller, used for refcounting - * @num_dt_params: Number of driver-specific DT parameters - * @params: List of DT parameters - * @conf_items: Information how to print @params in debugfs + * @num_custom_params: Number of driver-specific custom parameters to be parsed + * from the hardware description + * @custom_params: List of driver_specific custom parameters to be parsed from + * the hardware description + * @custom_conf_items: Information how to print @params in debugfs, must be + * the same size as the @custom_params, i.e. @num_custom_params */ struct pinctrl_desc { const char *name; @@ -130,10 +133,10 @@ struct pinctrl_desc { const struct pinmux_ops *pmxops; const struct pinconf_ops *confops; struct module *owner; -#if defined(CONFIG_GENERIC_PINCONF) && defined(CONFIG_OF) - unsigned int num_dt_params; - const struct pinconf_generic_dt_params *params; - const struct pin_config_item *conf_items; +#ifdef CONFIG_GENERIC_PINCONF + unsigned int num_custom_params; + const struct pinconf_generic_params *custom_params; + const struct pin_config_item *custom_conf_items; #endif }; -- cgit v1.2.3 From 5a7d2efdd93f6c4bb6cd3d5df3d2f5611c9b87ac Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 16 Dec 2014 10:59:17 +0100 Subject: pinctrl: consumer: use correct retval for placeholder functions These functions are supposed to return an error pointer, not NULL. Signed-off-by: Wolfram Sang Signed-off-by: Linus Walleij --- include/linux/pinctrl/consumer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h index 18eccefea06e..72c0415d6c21 100644 --- a/include/linux/pinctrl/consumer.h +++ b/include/linux/pinctrl/consumer.h @@ -82,7 +82,7 @@ static inline int pinctrl_gpio_direction_output(unsigned gpio) static inline struct pinctrl * __must_check pinctrl_get(struct device *dev) { - return NULL; + return ERR_PTR(-ENOSYS); } static inline void pinctrl_put(struct pinctrl *p) @@ -93,7 +93,7 @@ static inline struct pinctrl_state * __must_check pinctrl_lookup_state( struct pinctrl *p, const char *name) { - return NULL; + return ERR_PTR(-ENOSYS); } static inline int pinctrl_select_state(struct pinctrl *p, @@ -104,7 +104,7 @@ static inline int pinctrl_select_state(struct pinctrl *p, static inline struct pinctrl * __must_check devm_pinctrl_get(struct device *dev) { - return NULL; + return ERR_PTR(-ENOSYS); } static inline void devm_pinctrl_put(struct pinctrl *p) -- cgit v1.2.3 From d84b6728c54dcf73bcef3e3f7cf6767e2d224e39 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 6 Jan 2015 11:45:07 -0800 Subject: locking/mcs: Better differentiate between MCS variants We have two flavors of the MCS spinlock: standard and cancelable (OSQ). While each one is independent of the other, we currently mix and match them. This patch: - Moves the OSQ code out of mcs_spinlock.h (which only deals with the traditional version) into include/linux/osq_lock.h. No unnecessary code is added to the more global header file, anything locks that make use of OSQ must include it anyway. - Renames mcs_spinlock.c to osq_lock.c. This file only contains osq code. - Introduces a CONFIG_LOCK_SPIN_ON_OWNER in order to only build osq_lock if there is support for it. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Cc: Thomas Gleixner Cc: "Paul E. McKenney" Cc: Jason Low Cc: Linus Torvalds Cc: Mikulas Patocka Cc: Waiman Long Link: http://lkml.kernel.org/r/1420573509-24774-5-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/osq_lock.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h index 90230d5811c5..3a6490e81b28 100644 --- a/include/linux/osq_lock.h +++ b/include/linux/osq_lock.h @@ -5,8 +5,11 @@ * An MCS like lock especially tailored for optimistic spinning for sleeping * lock implementations (mutex, rwsem, etc). */ - -#define OSQ_UNLOCKED_VAL (0) +struct optimistic_spin_node { + struct optimistic_spin_node *next, *prev; + int locked; /* 1 if lock acquired */ + int cpu; /* encoded CPU # + 1 value */ +}; struct optimistic_spin_queue { /* @@ -16,6 +19,8 @@ struct optimistic_spin_queue { atomic_t tail; }; +#define OSQ_UNLOCKED_VAL (0) + /* Init macro and function. */ #define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) } @@ -24,4 +29,7 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock) atomic_set(&lock->tail, OSQ_UNLOCKED_VAL); } +extern bool osq_lock(struct optimistic_spin_queue *lock); +extern void osq_unlock(struct optimistic_spin_queue *lock); + #endif -- cgit v1.2.3 From 86038c5ea81b519a8a1fcfcd5e4599aab0cdd119 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Tue, 16 Dec 2014 12:47:34 +0100 Subject: perf: Avoid horrible stack usage Both Linus (most recent) and Steve (a while ago) reported that perf related callbacks have massive stack bloat. The problem is that software events need a pt_regs in order to properly report the event location and unwind stack. And because we could not assume one was present we allocated one on stack and filled it with minimal bits required for operation. Now, pt_regs is quite large, so this is undesirable. Furthermore it turns out that most sites actually have a pt_regs pointer available, making this even more onerous, as the stack space is pointless waste. This patch addresses the problem by observing that software events have well defined nesting semantics, therefore we can use static per-cpu storage instead of on-stack. Linus made the further observation that all but the scheduler callers of perf_sw_event() have a pt_regs available, so we change the regular perf_sw_event() to require a valid pt_regs (where it used to be optional) and add perf_sw_event_sched() for the scheduler. We have a scheduler specific call instead of a more generic _noregs() like construct because we can assume non-recursion from the scheduler and thereby simplify the code further (_noregs would have to put the recursion context call inline in order to assertain which __perf_regs element to use). One last note on the implementation of perf_trace_buf_prepare(); we allow .regs = NULL for those cases where we already have a pt_regs pointer available and do not need another. Reported-by: Linus Torvalds Reported-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Javi Merino Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Oleg Nesterov Cc: Paul Mackerras Cc: Petr Mladek Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vaibhav Nagarnaik Link: http://lkml.kernel.org/r/20141216115041.GW3337@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 2 +- include/linux/perf_event.h | 28 +++++++++++++++++++++------- 2 files changed, 22 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 0bebb5c348b8..d36f68b08acc 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -595,7 +595,7 @@ extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); extern void *perf_trace_buf_prepare(int size, unsigned short type, - struct pt_regs *regs, int *rctxp); + struct pt_regs **regs, int *rctxp); static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4f7a61ca4b39..3a7bd80b4db8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -665,6 +665,7 @@ static inline int is_software_event(struct perf_event *event) extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; +extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64); extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); #ifndef perf_arch_fetch_caller_regs @@ -689,14 +690,25 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs) static __always_inline void perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { - struct pt_regs hot_regs; + if (static_key_false(&perf_swevent_enabled[event_id])) + __perf_sw_event(event_id, nr, regs, addr); +} + +DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]); +/* + * 'Special' version for the scheduler, it hard assumes no recursion, + * which is guaranteed by us not actually scheduling inside other swevents + * because those disable preemption. + */ +static __always_inline void +perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) +{ if (static_key_false(&perf_swevent_enabled[event_id])) { - if (!regs) { - perf_fetch_caller_regs(&hot_regs); - regs = &hot_regs; - } - __perf_sw_event(event_id, nr, regs, addr); + struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); + + perf_fetch_caller_regs(regs); + ___perf_sw_event(event_id, nr, regs, addr); } } @@ -712,7 +724,7 @@ static inline void perf_event_task_sched_in(struct task_struct *prev, static inline void perf_event_task_sched_out(struct task_struct *prev, struct task_struct *next) { - perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); + perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0); if (static_key_false(&perf_sched_events.key)) __perf_event_task_sched_out(prev, next); @@ -823,6 +835,8 @@ static inline int perf_event_refresh(struct perf_event *event, int refresh) static inline void perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } static inline void +perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { } +static inline void perf_bp_event(struct perf_event *event, void *data) { } static inline int perf_register_guest_info_callbacks -- cgit v1.2.3 From a2b12f3c7ac1ea43ae646db74faf0b56c2bba563 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 12 Jan 2015 17:00:37 -0800 Subject: udp: pass udp_offload struct to UDP gro callbacks This patch introduces udp_offload_callbacks which has the same GRO functions (but not a GSO function) as offload_callbacks, except there is an argument to a udp_offload struct passed to gro_receive and gro_complete functions. This additional argument can be used to retrieve the per port structure of the encapsulation for use in gro processing (mostly by doing container_of on the structure). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 679e6e90aa4c..47921c291dd6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1969,7 +1969,7 @@ struct offload_callbacks { struct sk_buff *(*gso_segment)(struct sk_buff *skb, netdev_features_t features); struct sk_buff **(*gro_receive)(struct sk_buff **head, - struct sk_buff *skb); + struct sk_buff *skb); int (*gro_complete)(struct sk_buff *skb, int nhoff); }; @@ -1979,10 +1979,21 @@ struct packet_offload { struct list_head list; }; +struct udp_offload; + +struct udp_offload_callbacks { + struct sk_buff **(*gro_receive)(struct sk_buff **head, + struct sk_buff *skb, + struct udp_offload *uoff); + int (*gro_complete)(struct sk_buff *skb, + int nhoff, + struct udp_offload *uoff); +}; + struct udp_offload { __be16 port; u8 ipproto; - struct offload_callbacks callbacks; + struct udp_offload_callbacks callbacks; }; /* often modified stats are per cpu, other are shared (netdev->stats) */ -- cgit v1.2.3 From c38fda3fe8163898f538a45d3c1419e6870625ed Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 14 Jan 2015 22:59:13 -0800 Subject: jbd: drop jbd_ENOSYS debug A quick search shows that there are no users, drop the macro for both jbd and jbd2. Signed-off-by: Davidlohr Bueso Cc: Jan Kara Signed-off-by: Jan Kara --- include/linux/jbd.h | 9 --------- include/linux/jbd2.h | 9 --------- 2 files changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 31229e0be90b..d32615280be9 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -956,15 +956,6 @@ void __log_wait_for_space(journal_t *journal); extern void __journal_drop_transaction(journal_t *, transaction_t *); extern int cleanup_journal_tail(journal_t *); -/* Debugging code only: */ - -#define jbd_ENOSYS() \ -do { \ - printk (KERN_ERR "JBD unimplemented function %s\n", __func__); \ - current->state = TASK_UNINTERRUPTIBLE; \ - schedule(); \ -} while (1) - /* * is_journal_abort * diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 704b9a599b26..20e7f78041c8 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1251,15 +1251,6 @@ void __jbd2_log_wait_for_space(journal_t *journal); extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); extern int jbd2_cleanup_journal_tail(journal_t *); -/* Debugging code only: */ - -#define jbd_ENOSYS() \ -do { \ - printk (KERN_ERR "JBD unimplemented function %s\n", __func__); \ - current->state = TASK_UNINTERRUPTIBLE; \ - schedule(); \ -} while (1) - /* * is_journal_abort * -- cgit v1.2.3 From 37715f556a0776356300391f8ac41ace91bea447 Mon Sep 17 00:00:00 2001 From: David Ung Date: Tue, 13 Jan 2015 19:04:25 -0800 Subject: video: fbdev: Add additional vesa modes Add high resolution modes to vesa_modes struct. Signed-off-by: David Ung Signed-off-by: Tomi Valkeinen --- include/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 09bb7a18d287..882dbd1a87b7 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -726,7 +726,7 @@ extern int fb_videomode_from_videomode(const struct videomode *vm, struct fb_videomode *fbmode); /* drivers/video/modedb.c */ -#define VESA_MODEDB_SIZE 34 +#define VESA_MODEDB_SIZE 43 extern void fb_var_to_videomode(struct fb_videomode *mode, const struct fb_var_screeninfo *var); extern void fb_videomode_to_var(struct fb_var_screeninfo *var, -- cgit v1.2.3 From 8f5ee77bb8d162abe28ff8cd56f36e825d143207 Mon Sep 17 00:00:00 2001 From: David Ung Date: Tue, 13 Jan 2015 19:04:26 -0800 Subject: video: fbdev: Check Standard Timing against DMT Add the VESA Display Monitor Timing (DMT) table. During parsing of Standard Timings, it compare the 2 byte STD code with DMT to see what the VESA mode should be. If there is no entry in the vesa_modes table or no match found, it fallsback to the GTF timings. Signed-off-by: David Ung Signed-off-by: Tomi Valkeinen --- include/linux/fb.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 882dbd1a87b7..043f3283b71c 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -727,6 +727,8 @@ extern int fb_videomode_from_videomode(const struct videomode *vm, /* drivers/video/modedb.c */ #define VESA_MODEDB_SIZE 43 +#define DMT_SIZE 0x50 + extern void fb_var_to_videomode(struct fb_videomode *mode, const struct fb_var_screeninfo *var); extern void fb_videomode_to_var(struct fb_var_screeninfo *var, @@ -777,9 +779,17 @@ struct fb_videomode { u32 flag; }; +struct dmt_videomode { + u32 dmt_id; + u32 std_2byte_code; + u32 cvt_3byte_code; + const struct fb_videomode *mode; +}; + extern const char *fb_mode_option; extern const struct fb_videomode vesa_modes[]; extern const struct fb_videomode cea_modes[64]; +extern const struct dmt_videomode dmt_modes[]; struct fb_modelist { struct list_head list; -- cgit v1.2.3 From d621e8bae5ac9c67de4de90c5cded12adc8ee1e1 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Wed, 17 Dec 2014 16:51:13 +0100 Subject: gpio/gpiolib-of: Create of_mm_gpiochip_remove Create counterpart of of_mm_gpiochip_add(). This way the modules that can be removable do not duplicate the cleanup code. Suggested-by: Alexandre Courbot Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Linus Walleij --- include/linux/of_gpio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index 38fc05036015..69dbe312b11b 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -52,6 +52,7 @@ extern int of_get_named_gpio_flags(struct device_node *np, extern int of_mm_gpiochip_add(struct device_node *np, struct of_mm_gpio_chip *mm_gc); +extern void of_mm_gpiochip_remove(struct of_mm_gpio_chip *mm_gc); extern void of_gpiochip_add(struct gpio_chip *gc); extern void of_gpiochip_remove(struct gpio_chip *gc); -- cgit v1.2.3 From 53e41f554a0cbad139ee5072bbb49b4951f680c2 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 15 Dec 2014 10:39:47 +0100 Subject: gpio: tc3589x: get rid of platform data This device is only used from the device tree, and the startup() and remove() callbacks are not used anywhere in the kernel, so retire them and the pdata altogether. Cc: Samuel Ortiz Acked-by: Lee Jones Signed-off-by: Linus Walleij --- include/linux/mfd/tc3589x.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tc3589x.h b/include/linux/mfd/tc3589x.h index e1c12d84c26a..c203c9c56776 100644 --- a/include/linux/mfd/tc3589x.h +++ b/include/linux/mfd/tc3589x.h @@ -162,25 +162,13 @@ struct tc3589x_keypad_platform_data { bool no_autorepeat; }; -/** - * struct tc3589x_gpio_platform_data - TC3589x GPIO platform data - * @setup: callback for board-specific initialization - * @remove: callback for board-specific teardown - */ -struct tc3589x_gpio_platform_data { - void (*setup)(struct tc3589x *tc3589x, unsigned gpio_base); - void (*remove)(struct tc3589x *tc3589x, unsigned gpio_base); -}; - /** * struct tc3589x_platform_data - TC3589x platform data * @block: bitmask of blocks to enable (use TC3589x_BLOCK_*) - * @gpio: GPIO-specific platform data * @keypad: keypad-specific platform data */ struct tc3589x_platform_data { unsigned int block; - struct tc3589x_gpio_platform_data *gpio; const struct tc3589x_keypad_platform_data *keypad; }; -- cgit v1.2.3 From d34541bc48eb7d7cb2ead5ff0284acf65af96f17 Mon Sep 17 00:00:00 2001 From: Olliver Schinagl Date: Wed, 7 Jan 2015 09:44:57 +0100 Subject: gpio: Make the vararg hacks not pass magic values Right now, in consumer.h, there's some vararg hacks that pass 0 as the flags. What actually is passed however is GPIOD_ASIS, which naturally is also 0. Using the define/enum rather then the magic 0 makes it the define more readable to a passer by. Signed-off-by: Olliver Schinagl Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index fd85cb120ee0..45afc2dee560 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -340,31 +340,32 @@ static inline int desc_to_gpio(const struct gpio_desc *desc) * etc. */ #define __gpiod_get(dev, con_id, flags, ...) __gpiod_get(dev, con_id, flags) -#define gpiod_get(varargs...) __gpiod_get(varargs, 0) +#define gpiod_get(varargs...) __gpiod_get(varargs, GPIOD_ASIS) #define __gpiod_get_index(dev, con_id, index, flags, ...) \ __gpiod_get_index(dev, con_id, index, flags) -#define gpiod_get_index(varargs...) __gpiod_get_index(varargs, 0) +#define gpiod_get_index(varargs...) __gpiod_get_index(varargs, GPIOD_ASIS) #define __gpiod_get_optional(dev, con_id, flags, ...) \ __gpiod_get_optional(dev, con_id, flags) -#define gpiod_get_optional(varargs...) __gpiod_get_optional(varargs, 0) +#define gpiod_get_optional(varargs...) __gpiod_get_optional(varargs, GPIOD_ASIS) #define __gpiod_get_index_optional(dev, con_id, index, flags, ...) \ __gpiod_get_index_optional(dev, con_id, index, flags) #define gpiod_get_index_optional(varargs...) \ - __gpiod_get_index_optional(varargs, 0) + __gpiod_get_index_optional(varargs, GPIOD_ASIS) #define __devm_gpiod_get(dev, con_id, flags, ...) \ __devm_gpiod_get(dev, con_id, flags) -#define devm_gpiod_get(varargs...) __devm_gpiod_get(varargs, 0) +#define devm_gpiod_get(varargs...) __devm_gpiod_get(varargs, GPIOD_ASIS) #define __devm_gpiod_get_index(dev, con_id, index, flags, ...) \ __devm_gpiod_get_index(dev, con_id, index, flags) -#define devm_gpiod_get_index(varargs...) __devm_gpiod_get_index(varargs, 0) +#define devm_gpiod_get_index(varargs...) \ + __devm_gpiod_get_index(varargs, GPIOD_ASIS) #define __devm_gpiod_get_optional(dev, con_id, flags, ...) \ __devm_gpiod_get_optional(dev, con_id, flags) #define devm_gpiod_get_optional(varargs...) \ - __devm_gpiod_get_optional(varargs, 0) + __devm_gpiod_get_optional(varargs, GPIOD_ASIS) #define __devm_gpiod_get_index_optional(dev, con_id, index, flags, ...) \ __devm_gpiod_get_index_optional(dev, con_id, index, flags) #define devm_gpiod_get_index_optional(varargs...) \ - __devm_gpiod_get_index_optional(varargs, 0) + __devm_gpiod_get_index_optional(varargs, GPIOD_ASIS) #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_GPIO_SYSFS) -- cgit v1.2.3 From 2397aa8b515f7bd77c8d5698170b6a98fdd6721c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 13 Jan 2015 11:02:54 -0500 Subject: svcrdma: Clean up read chunk counting The byte_count argument is not used, and the function is called only from one place. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 975da754c778..2280325e4c88 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -178,8 +178,6 @@ struct svcxprt_rdma { #define RPCRDMA_MAX_REQ_SIZE 4096 /* svc_rdma_marshal.c */ -extern void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *, - int *, int *); extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, -- cgit v1.2.3 From e54524111f51eac1900cf91aca3d38a92a6b11c0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 13 Jan 2015 11:03:20 -0500 Subject: svcrdma: Plant reader function in struct svcxprt_rdma The RDMA reader function doesn't change once an svcxprt_rdma is instantiated. Instead of checking sc_devcap during every incoming RPC, set the reader function once when the connection is accepted. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 2280325e4c88..f161e309f25e 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -150,6 +150,10 @@ struct svcxprt_rdma { struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; struct ib_mr *sc_phys_mr; /* MR for server memory */ + int (*sc_reader)(struct svcxprt_rdma *, + struct svc_rqst *, + struct svc_rdma_op_ctxt *, + int *, u32 *, u32, u32, u64, bool); u32 sc_dev_caps; /* distilled device caps */ u32 sc_dma_lkey; /* local dma key */ unsigned int sc_frmr_pg_list_len; @@ -195,6 +199,12 @@ extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *); /* svc_rdma_recvfrom.c */ extern int svc_rdma_recvfrom(struct svc_rqst *); +extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *, + struct svc_rdma_op_ctxt *, int *, u32 *, + u32, u32, u64, bool); +extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, + struct svc_rdma_op_ctxt *, int *, u32 *, + u32, u32, u64, bool); /* svc_rdma_sendto.c */ extern int svc_rdma_sendto(struct svc_rqst *); -- cgit v1.2.3 From 0b056c224bea63060ce8a981e84193c93fac6f5d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 13 Jan 2015 11:03:37 -0500 Subject: svcrdma: Support RDMA_NOMSG requests Currently the Linux server can not decode RDMA_NOMSG type requests. Operations whose length exceeds the fixed size of RDMA SEND buffers, like large NFSv4 CREATE(NF4LNK) operations, must be conveyed via RDMA_NOMSG. For an RDMA_MSG type request, the client sends the RPC/RDMA, RPC headers, and some or all of the NFS arguments via RDMA SEND. For an RDMA_NOMSG type request, the client sends just the RPC/RDMA header via RDMA SEND. The request's read list contains elements for the entire RPC message, including the RPC header. NFSD expects the RPC/RMDA header and RPC header to be contiguous in page zero of the XDR buffer. Add logic in the RDMA READ path to make the read list contents land where the server prefers, when the incoming message is a type RDMA_NOMSG message. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index f161e309f25e..c343a94bc791 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -79,6 +79,7 @@ struct svc_rdma_op_ctxt { enum ib_wr_opcode wr_op; enum ib_wc_status wc_status; u32 byte_len; + u32 position; struct svcxprt_rdma *xprt; unsigned long flags; enum dma_data_direction direction; -- cgit v1.2.3 From 57699a40b4f2694d3ee63fd5e6465ec8f600b620 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 16 Jan 2015 11:13:09 +0800 Subject: rhashtable: Fix race in rhashtable_destroy() and use regular work_struct When we put our declared work task in the global workqueue with schedule_delayed_work(), its delay parameter is always zero. Therefore, we should define a regular work in rhashtable structure instead of a delayed work. By the way, we add a condition to check whether resizing functions are NULL before cancelling the work, avoiding to cancel an uninitialized work. Lastly, while we wait for all work items we submitted before to run to completion with cancel_delayed_work(), ht->mutex has been taken in rhashtable_destroy(). Moreover, cancel_delayed_work() doesn't return until all work items are accomplished, and when work items are scheduled, the work's function - rht_deferred_worker() will be called. However, as rht_deferred_worker() also needs to acquire the lock, deadlock might happen at the moment as the lock is already held before. So if the cancel work function is moved out of the lock covered scope, this will avoid the deadlock. Fixes: 97defe1 ("rhashtable: Per bucket locks & deferred expansion/shrinking") Signed-off-by: Ying Xue Cc: Thomas Graf Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 9570832ab07c..a2562ed53ea3 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -119,7 +119,7 @@ struct rhashtable { atomic_t nelems; atomic_t shift; struct rhashtable_params p; - struct delayed_work run_work; + struct work_struct run_work; struct mutex mutex; bool being_destroyed; }; -- cgit v1.2.3 From 5cd37193ce8539be1e6ef76be226f4bcc984e0f5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 13 Dec 2014 20:32:04 -0800 Subject: rcu: Make cond_resched_rcu_qs() apply to normal RCU flavors Although cond_resched_rcu_qs() only applies to TASKS_RCU, it is used in places where it would be useful for it to apply to the normal RCU flavors, rcu_preempt, rcu_sched, and rcu_bh. This is especially the case for workloads that aggressively overload the system, particularly those that generate large numbers of RCU updates on systems running NO_HZ_FULL CPUs. This commit therefore communicates quiescent states from cond_resched_rcu_qs() to the normal RCU flavors. Note that it is unfortunately necessary to leave the old ->passed_quiesce mechanism in place to allow quiescent states that apply to only one flavor to be recorded. (Yes, we could decrement ->rcu_qs_ctr_snap in that case, but that is not so good for debugging of RCU internals.) In addition, if one of the RCU flavor's grace period has stalled, this will invoke rcu_momentary_dyntick_idle(), resulting in a heavy-weight quiescent state visible from other CPUs. Reported-by: Sasha Levin Reported-by: Dave Jones Signed-off-by: Paul E. McKenney [ paulmck: Merge commit from Sasha Levin fixing a bug where __this_cpu() was used in preemptible code. ] --- include/linux/rcupdate.h | 3 ++- include/linux/rcutiny.h | 5 ++++- include/linux/rcutree.h | 2 ++ 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ed4f5939a452..468228750299 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -331,12 +331,13 @@ static inline void rcu_init_nohz(void) extern struct srcu_struct tasks_rcu_exit_srcu; #define rcu_note_voluntary_context_switch(t) \ do { \ + rcu_all_qs(); \ if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \ ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \ } while (0) #else /* #ifdef CONFIG_TASKS_RCU */ #define TASKS_RCU(x) do { } while (0) -#define rcu_note_voluntary_context_switch(t) do { } while (0) +#define rcu_note_voluntary_context_switch(t) rcu_all_qs() #endif /* #else #ifdef CONFIG_TASKS_RCU */ /** diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 0e5366200154..fabd3fad8516 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -154,7 +154,10 @@ static inline bool rcu_is_watching(void) return true; } - #endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ +static inline void rcu_all_qs(void) +{ +} + #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 52953790dcca..ddba927f7316 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -97,4 +97,6 @@ extern int rcu_scheduler_active __read_mostly; bool rcu_is_watching(void); +void rcu_all_qs(void); + #endif /* __LINUX_RCUTREE_H */ -- cgit v1.2.3 From 076c3b8e03e2737659a89660bb8e54e13587d974 Mon Sep 17 00:00:00 2001 From: James Ban Date: Fri, 16 Jan 2015 12:13:27 +0900 Subject: regulator: da9211: fix unmatched of_node This is a patch for fixing unmatched of_node. Signed-off-by: James Ban Signed-off-by: Mark Brown --- include/linux/regulator/da9211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h index 5479394fefce..d1d9d3849bdb 100644 --- a/include/linux/regulator/da9211.h +++ b/include/linux/regulator/da9211.h @@ -32,6 +32,7 @@ struct da9211_pdata { * 2 : 2 phase 2 buck */ int num_buck; + struct device_node *reg_node[DA9211_MAX_REGULATORS]; struct regulator_init_data *init_data[DA9211_MAX_REGULATORS]; }; #endif -- cgit v1.2.3 From 67c2b9cb30f561325b010e046b7bbe2a327e69a0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 18 Nov 2014 12:18:18 +0100 Subject: ARM: 8207/1: amba: Use inlines instead of macros for amba_pclk_enable/disable Replace the amba_pclk_enable and amba_pclk_disable macros with static inline functions and remove checks for IS_ERR. The amba bus clock won't be ERR because probe would fail before the use of these functions. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Russell King --- include/linux/amba/bus.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 2afc618b15ce..0ab5f8e0dea2 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -92,11 +92,15 @@ struct amba_device *amba_find_device(const char *, struct device *, unsigned int int amba_request_regions(struct amba_device *, const char *); void amba_release_regions(struct amba_device *); -#define amba_pclk_enable(d) \ - (IS_ERR((d)->pclk) ? 0 : clk_enable((d)->pclk)) +static inline int amba_pclk_enable(struct amba_device *dev) +{ + return clk_enable(dev->pclk); +} -#define amba_pclk_disable(d) \ - do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0) +static inline void amba_pclk_disable(struct amba_device *dev) +{ + clk_disable(dev->pclk); +} static inline int amba_pclk_prepare(struct amba_device *dev) { -- cgit v1.2.3 From 78e1f974dd351ac82d978e3aac2d27422013f914 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 14 Dec 2014 02:37:09 +0200 Subject: iommu/ipmmu-vmsa: Remove platform data support No board file instantiates the IPMMU using platform data. Now that we have DT support, get rid of platform data. Signed-off-by: Laurent Pinchart --- include/linux/platform_data/ipmmu-vmsa.h | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 include/linux/platform_data/ipmmu-vmsa.h (limited to 'include/linux') diff --git a/include/linux/platform_data/ipmmu-vmsa.h b/include/linux/platform_data/ipmmu-vmsa.h deleted file mode 100644 index 5275b3ac6d37..000000000000 --- a/include/linux/platform_data/ipmmu-vmsa.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * IPMMU VMSA Platform Data - * - * Copyright (C) 2014 Renesas Electronics Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - */ - -#ifndef __IPMMU_VMSA_H__ -#define __IPMMU_VMSA_H__ - -struct ipmmu_vmsa_master { - const char *name; - unsigned int utlb; -}; - -struct ipmmu_vmsa_platform_data { - const struct ipmmu_vmsa_master *masters; - unsigned int num_masters; -}; - -#endif /* __IPMMU_VMSA_H__ */ -- cgit v1.2.3 From f331a859e0ee5a898c1f47596eddad4c4f02d657 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 15 Jan 2015 18:16:04 -0600 Subject: PCI: Add flag for devices where we can't use bus reset Enable a mechanism for devices to quirk that they do not behave when doing a PCI bus reset. We require a modest level of spec compliant behavior in order to do a reset, for instance the device should come out of reset without throwing errors and PCI config space should be accessible after reset. This is too much to ask for some devices. Link: http://lkml.kernel.org/r/20140923210318.498dacbd@dualc.maya.org Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v3.14+ --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 360a966a97a5..44627f1df4ca 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -175,6 +175,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_DMA_ALIAS_DEVFN = (__force pci_dev_flags_t) (1 << 4), /* Use a PCIe-to-PCI bridge alias even if !pci_is_pcie */ PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS = (__force pci_dev_flags_t) (1 << 5), + /* Do not use bus resets for device */ + PCI_DEV_FLAGS_NO_BUS_RESET = (__force pci_dev_flags_t) (1 << 6), }; enum pci_irq_reroute_variant { -- cgit v1.2.3 From 8505e729a2f6eb0803ff943a15f133dd10afff3a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 15 Jan 2015 16:21:49 -0600 Subject: PCI: Add pci_claim_bridge_resource() to clip window if necessary Add pci_claim_bridge_resource() to claim a PCI-PCI bridge window. This is like regular pci_claim_resource(), except that if we fail to claim the window, we check to see if we can reduce the size of the window and try again. This is for scenarios like this: pci_bus 0000:00: root bus resource [mem 0xc0000000-0xffffffff] pci 0000:00:01.0: bridge window [mem 0xbdf00000-0xddefffff 64bit pref] pci 0000:01:00.0: reg 0x10: [mem 0xc0000000-0xcfffffff pref] The 00:01.0 window is illegal: it starts before the host bridge window, so we have to assume the [0xbdf00000-0xbfffffff] region is inaccessible. We can make it legal by clipping it to [mem 0xc0000000-0xddefffff 64bit pref]. Previously we discarded the 00:01.0 window and tried to reassign that part of the hierarchy from scratch. That is a problem because Linux doesn't always assign things optimally. For example, in this case, BIOS put the 01:00.0 device in a prefetchable window below 4GB, but after 5b28541552ef, Linux puts the prefetchable window above 4GB where the 32-bit 01:00.0 device can't use it. Clipping the 00:01.0 window is less intrusive than completely reassigning things and is sufficient to let us use most of the BIOS configuration. Of course, it's possible that devices below 00:01.0 will no longer fit. If that's the case, we'll have to reassign things. But that's a separate problem. [bhelgaas: changelog, split into separate patch] Link: https://bugzilla.kernel.org/show_bug.cgi?id=85491 Reported-by: Marek Kordik Fixes: 5b28541552ef ("PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources") Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v3.16+ --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 44627f1df4ca..9603094ed59b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1067,6 +1067,7 @@ resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx); void pci_bus_assign_resources(const struct pci_bus *bus); void pci_bus_size_bridges(struct pci_bus *bus); int pci_claim_resource(struct pci_dev *, int); +int pci_claim_bridge_resource(struct pci_dev *bridge, int i); void pci_assign_unassigned_resources(void); void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge); void pci_assign_unassigned_bus_resources(struct pci_bus *bus); -- cgit v1.2.3 From 51e537387990dc1f00752103f314fd135cb94bc6 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Nov 2014 11:24:08 -0700 Subject: PCI: Add flag for devices that don't reset on D3hot->D0 transition Per the PCI Power Management spec r1.2, sec 3.2.4, a device that advertises No_Soft_Reset == 0 in the PMCSR register (reported by lspci as "NoSoftRst-") should perform an internal reset when transitioning from D3hot to D0 via software control. Configuration context is lost and the device requires a full reinitialization sequence. Unfortunately the definition of "internal reset", beyond the application of the configuration context, is largely left to the interpretation of the specific device. Some devices don't seem to perform an "internal reset" even if they report No_Soft_Reset == 0. We still need to honor the PCI specification and restore PCI config context in the event that we do a PM reset, so we don't cache and modify the PCI_PM_CTRL_NO_SOFT_RESET bit for the device, but for interfaces where the intention is to reset the device, like pci_reset_function(), we need a mechanism to flag that PM reset (a D3hot->D0 transition) doesn't perform any significant "internal reset" of the device. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 44627f1df4ca..7bed32b3fd54 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -177,6 +177,8 @@ enum pci_dev_flags { PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS = (__force pci_dev_flags_t) (1 << 5), /* Do not use bus resets for device */ PCI_DEV_FLAGS_NO_BUS_RESET = (__force pci_dev_flags_t) (1 << 6), + /* Do not use PM reset even if device advertises NoSoftRst- */ + PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7), }; enum pci_irq_reroute_variant { -- cgit v1.2.3 From 6dee60f69d48fcef021b4b53b3431797ec440764 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jan 2015 15:05:54 -0500 Subject: locks: add new struct list_head to struct file_lock ...that we can use to queue file_locks to per-ctx list_heads. Go ahead and convert locks_delete_lock and locks_dispose_list to use it instead of the fl_block list. Signed-off-by: Jeff Layton Acked-by: Christoph Hellwig --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 42efe13077b6..cd6818115162 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -934,6 +934,7 @@ int locks_in_grace(struct net *); */ struct file_lock { struct file_lock *fl_next; /* singly linked list for this inode */ + struct list_head fl_list; /* link into file_lock_context */ struct hlist_node fl_link; /* node in global lists */ struct list_head fl_block; /* circular list of blocked processes */ fl_owner_t fl_owner; -- cgit v1.2.3 From 4a075e39c86490cc0f0c10ac6abe3592d1689463 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jan 2015 15:05:54 -0500 Subject: locks: add a new struct file_locking_context pointer to struct inode The current scheme of using the i_flock list is really difficult to manage. There is also a legitimate desire for a per-inode spinlock to manage these lists that isn't the i_lock. Start conversion to a new scheme to eventually replace the old i_flock list with a new "file_lock_context" object. We start by adding a new i_flctx to struct inode. For now, it lives in parallel with i_flock list, but will eventually replace it. The idea is to allocate a structure to sit in that pointer and act as a locus for all things file locking. We allocate a file_lock_context for an inode when the first lock is added to it, and it's only freed when the inode is freed. We use the i_lock to protect the assignment, but afterward it should mostly be accessed locklessly. Signed-off-by: Jeff Layton Acked-by: Christoph Hellwig --- include/linux/fs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index cd6818115162..dec0d38b05de 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -626,6 +626,7 @@ struct inode { #endif const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ struct file_lock *i_flock; + struct file_lock_context *i_flctx; struct address_space i_data; struct list_head i_devices; union { @@ -965,6 +966,10 @@ struct file_lock { } fl_u; }; +struct file_lock_context { + struct list_head flc_flock; +}; + /* The following constant reflects the upper bound of the file/locking space */ #ifndef OFFSET_MAX #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) @@ -991,6 +996,7 @@ extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); extern int fcntl_getlease(struct file *filp); /* fs/locks.c */ +void locks_free_lock_context(struct file_lock_context *ctx); void locks_free_lock(struct file_lock *fl); extern void locks_init_lock(struct file_lock *); extern struct file_lock * locks_alloc_lock(void); @@ -1048,6 +1054,11 @@ static inline int fcntl_getlease(struct file *filp) return F_UNLCK; } +static inline void +locks_free_lock_context(struct file_lock_context *ctx) +{ +} + static inline void locks_init_lock(struct file_lock *fl) { return; -- cgit v1.2.3 From bd61e0a9c852de2d705b6f1bb2cc54c5774db570 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jan 2015 15:05:55 -0500 Subject: locks: convert posix locks to file_lock_context Signed-off-by: Jeff Layton Acked-by: Christoph Hellwig --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index dec0d38b05de..571f113588e9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -968,6 +968,7 @@ struct file_lock { struct file_lock_context { struct list_head flc_flock; + struct list_head flc_posix; }; /* The following constant reflects the upper bound of the file/locking space */ @@ -1971,7 +1972,7 @@ static inline int locks_verify_truncate(struct inode *inode, struct file *filp, loff_t size) { - if (inode->i_flock && mandatory_lock(inode)) + if (inode->i_flctx && mandatory_lock(inode)) return locks_mandatory_area( FLOCK_VERIFY_WRITE, inode, filp, size < inode->i_size ? size : inode->i_size, -- cgit v1.2.3 From 8634b51f6ca298fb8b07aa4847340764903533ab Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jan 2015 15:05:55 -0500 Subject: locks: convert lease handling to file_lock_context Signed-off-by: Jeff Layton Acked-by: Christoph Hellwig --- include/linux/fs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 571f113588e9..2ddec3cf81b9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -969,6 +969,7 @@ struct file_lock { struct file_lock_context { struct list_head flc_flock; struct list_head flc_posix; + struct list_head flc_lease; }; /* The following constant reflects the upper bound of the file/locking space */ @@ -1990,7 +1991,7 @@ static inline int break_lease(struct inode *inode, unsigned int mode) * end up racing with tasks trying to set a new lease on this file. */ smp_mb(); - if (inode->i_flock) + if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) return __break_lease(inode, mode, FL_LEASE); return 0; } @@ -2003,7 +2004,7 @@ static inline int break_deleg(struct inode *inode, unsigned int mode) * end up racing with tasks trying to set a new lease on this file. */ smp_mb(); - if (inode->i_flock) + if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) return __break_lease(inode, mode, FL_DELEG); return 0; } -- cgit v1.2.3 From a7231a97467d5a0c36f82f581c76c12c034e4b80 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jan 2015 15:05:56 -0500 Subject: locks: remove i_flock field from struct inode Nothing uses it anymore. Also add a forward declaration for struct file_lock to silence some compiler warnings that the removal triggers. Signed-off-by: Jeff Layton Acked-by: Christoph Hellwig --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2ddec3cf81b9..ce0873af0b97 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -625,7 +625,6 @@ struct inode { atomic_t i_readcount; /* struct files open RO */ #endif const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ - struct file_lock *i_flock; struct file_lock_context *i_flctx; struct address_space i_data; struct list_head i_devices; @@ -886,6 +885,8 @@ static inline struct file *get_file(struct file *f) /* legacy typedef, should eventually be removed */ typedef void *fl_owner_t; +struct file_lock; + struct file_lock_operations { void (*fl_copy_lock)(struct file_lock *, struct file_lock *); void (*fl_release_private)(struct file_lock *); -- cgit v1.2.3 From 6109c85037e53443f29fd39c0de69f578a1cf285 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jan 2015 15:05:57 -0500 Subject: locks: add a dedicated spinlock to protect i_flctx lists We can now add a dedicated spinlock without expanding struct inode. Change to using that to protect the various i_flctx lists. Signed-off-by: Jeff Layton Acked-by: Christoph Hellwig --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ce0873af0b97..32eafa9b5c9f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -968,6 +968,7 @@ struct file_lock { }; struct file_lock_context { + spinlock_t flc_lock; struct list_head flc_flock; struct list_head flc_posix; struct list_head flc_lease; -- cgit v1.2.3 From 7448cc37b1a6b620d948aaee3bb30960c06d5d5d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jan 2015 15:05:57 -0500 Subject: locks: clean up the lm_change prototype Now that we use standard list_heads for tracking leases, we can have lm_change take a pointer to the lease to be modified instead of a double pointer. Signed-off-by: Jeff Layton Acked-by: Christoph Hellwig --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 32eafa9b5c9f..94e706a0a408 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -900,7 +900,7 @@ struct lock_manager_operations { void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, int); bool (*lm_break)(struct file_lock *); - int (*lm_change)(struct file_lock **, int, struct list_head *); + int (*lm_change)(struct file_lock *, int, struct list_head *); void (*lm_setup)(struct file_lock *, void **); }; @@ -1021,7 +1021,7 @@ extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int t extern void lease_get_mtime(struct inode *, struct timespec *time); extern int generic_setlease(struct file *, long, struct file_lock **, void **priv); extern int vfs_setlease(struct file *, long, struct file_lock **, void **); -extern int lease_modify(struct file_lock **, int, struct list_head *); +extern int lease_modify(struct file_lock *, int, struct list_head *); #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, unsigned int cmd, struct flock __user *user) @@ -1153,7 +1153,7 @@ static inline int vfs_setlease(struct file *filp, long arg, return -EINVAL; } -static inline int lease_modify(struct file_lock **before, int arg, +static inline int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose) { return -EINVAL; -- cgit v1.2.3 From 9bd0f45b7037fcfa8b575c7e27d0431d6e6dc3bb Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jan 2015 15:05:57 -0500 Subject: locks: keep a count of locks on the flctx lists This makes things a bit more efficient in the cifs and ceph lock pushing code. Signed-off-by: Jeff Layton Acked-by: Christoph Hellwig --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 94e706a0a408..f87cb2f03103 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -972,6 +972,9 @@ struct file_lock_context { struct list_head flc_flock; struct list_head flc_posix; struct list_head flc_lease; + int flc_flock_cnt; + int flc_posix_cnt; + int flc_lease_cnt; }; /* The following constant reflects the upper bound of the file/locking space */ -- cgit v1.2.3 From ee1c244219fd652964710a6cc3e4f922e86aa492 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Jan 2015 11:37:14 +0100 Subject: genetlink: synchronize socket closing and family removal In addition to the problem Jeff Layton reported, I looked at the code and reproduced the same warning by subscribing and removing the genl family with a socket still open. This is a fairly tricky race which originates in the fact that generic netlink allows the family to go away while sockets are still open - unlike regular netlink which has a module refcount for every open socket so in general this cannot be triggered. Trying to resolve this issue by the obvious locking isn't possible as it will result in deadlocks between unregistration and group unbind notification (which incidentally lockdep doesn't find due to the home grown locking in the netlink table.) To really resolve this, introduce a "closing socket" reference counter (for generic netlink only, as it's the only affected family) in the core netlink code and use that in generic netlink to wait for all the sockets that are being closed at the same time as a generic netlink family is removed. This fixes the race that when a socket is closed, it will should call the unbind, but if the family is removed at the same time the unbind will not find it, leading to the warning. The real problem though is that in this case the unbind could actually find a new family that is registered to have a multicast group with the same ID, and call its mcast_unbind() leading to confusing. Also remove the warning since it would still trigger, but is now no longer a problem. This also moves the code in af_netlink.c to before unreferencing the module to avoid having the same problem in the normal non-genl case. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/genetlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 55b685719d52..09460d6d6682 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -11,6 +11,10 @@ extern void genl_unlock(void); extern int lockdep_genl_is_held(void); #endif +/* for synchronisation between af_netlink and genetlink */ +extern atomic_t genl_sk_destructing_cnt; +extern wait_queue_head_t genl_sk_destructing_waitq; + /** * rcu_dereference_genl - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing -- cgit v1.2.3 From 9d5438f462abd6398cdb7b3211bdcec271873a3b Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Fri, 16 Jan 2015 16:01:10 -0800 Subject: pstore: Add pmsg - user-space accessible pstore object A secured user-space accessible pstore object. Writes to /dev/pmsg0 are appended to the buffer, on reboot the persistent contents are available in /sys/fs/pstore/pmsg-ramoops-[ID]. One possible use is syslogd, or other daemon, can write messages, then on reboot provides a means to triage user-space activities leading up to a panic as a companion to the pstore dmesg or console logs. Signed-off-by: Mark Salyzyn Acked-by: Kees Cook Signed-off-by: Tony Luck --- include/linux/pstore.h | 1 + include/linux/pstore_ram.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index ece0c6bbfcc5..8884f6e507f7 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -39,6 +39,7 @@ enum pstore_type_id { PSTORE_TYPE_PPC_RTAS = 4, PSTORE_TYPE_PPC_OF = 5, PSTORE_TYPE_PPC_COMMON = 6, + PSTORE_TYPE_PMSG = 7, PSTORE_TYPE_UNKNOWN = 255 }; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 4af3fdc85b01..9c9d6c154c8e 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -81,6 +81,7 @@ struct ramoops_platform_data { unsigned long record_size; unsigned long console_size; unsigned long ftrace_size; + unsigned long pmsg_size; int dump_oops; struct persistent_ram_ecc_info ecc_info; }; -- cgit v1.2.3 From b9626f32876debc356fadb6e19aebcfe9d70c5ff Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:51 +0100 Subject: tpm/tpm_i2c_stm_st33: Add new tpm_stm_dev structure and remove tpm_i2c_buffer[0], [1] buffer. In order to clean big buffers in st33zp24_platform_data structure, replace with tpm_stm_dev for driver internal usage. As only one buffer is really necessary replace with buf field. In the mean time move tpm_i2c_stm_st33.h to include/linux/platform_data. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- include/linux/platform_data/tpm_i2c_stm_st33.h | 40 ++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 include/linux/platform_data/tpm_i2c_stm_st33.h (limited to 'include/linux') diff --git a/include/linux/platform_data/tpm_i2c_stm_st33.h b/include/linux/platform_data/tpm_i2c_stm_st33.h new file mode 100644 index 000000000000..88f9cb18113a --- /dev/null +++ b/include/linux/platform_data/tpm_i2c_stm_st33.h @@ -0,0 +1,40 @@ +/* + * STMicroelectronics TPM I2C Linux driver for TPM ST33ZP24 + * Copyright (C) 2009, 2010 STMicroelectronics + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * STMicroelectronics version 1.2.0, Copyright (C) 2010 + * STMicroelectronics comes with ABSOLUTELY NO WARRANTY. + * This is free software, and you are welcome to redistribute it + * under certain conditions. + * + * @Author: Christophe RICARD tpmsupport@st.com + * + * @File: stm_st33_tpm_i2c.h + * + * @Date: 09/15/2010 + */ +#ifndef __STM_ST33_TPM_I2C_MAIN_H__ +#define __STM_ST33_TPM_I2C_MAIN_H__ + + +#define TPM_ST33_I2C "st33zp24_i2c" + +struct st33zp24_platform_data { + int io_serirq; + int io_lpcpd; +}; + +#endif /* __STM_ST33_TPM_I2C_MAIN_H__ */ -- cgit v1.2.3 From 76182b6b008b694d095aec1e2eb6c07fae787128 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:52 +0100 Subject: tpm/tpm_i2c_stm_st33: Remove reference to io_serirq The serirq gpio pin is used only as interrupt. After driver initialization, the serirq signal is always used through interrupt and never with gpio kernel API. The irq can then be initialized during the platform_data definition within the client->irq pin. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- include/linux/platform_data/tpm_i2c_stm_st33.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/tpm_i2c_stm_st33.h b/include/linux/platform_data/tpm_i2c_stm_st33.h index 88f9cb18113a..85775cf5f9a5 100644 --- a/include/linux/platform_data/tpm_i2c_stm_st33.h +++ b/include/linux/platform_data/tpm_i2c_stm_st33.h @@ -33,7 +33,6 @@ #define TPM_ST33_I2C "st33zp24_i2c" struct st33zp24_platform_data { - int io_serirq; int io_lpcpd; }; -- cgit v1.2.3 From 3eda7d0ea3a0365aa72a2007f9450f314d92f065 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 13 Jan 2015 23:13:13 +0100 Subject: tpm/tpm_i2c_stm_st33: Change tpm_i2c_stm_st33.h to tpm_stm_st33.h include/linux/platform_data/tpm_i2c_stm_st33.h can be used by other st33 tpm device driver not using i2c protocol. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Reviewed-by: Peter Huewe Signed-off-by: Peter Huewe --- include/linux/platform_data/tpm_i2c_stm_st33.h | 39 -------------------------- include/linux/platform_data/tpm_stm_st33.h | 39 ++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 39 deletions(-) delete mode 100644 include/linux/platform_data/tpm_i2c_stm_st33.h create mode 100644 include/linux/platform_data/tpm_stm_st33.h (limited to 'include/linux') diff --git a/include/linux/platform_data/tpm_i2c_stm_st33.h b/include/linux/platform_data/tpm_i2c_stm_st33.h deleted file mode 100644 index 85775cf5f9a5..000000000000 --- a/include/linux/platform_data/tpm_i2c_stm_st33.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * STMicroelectronics TPM I2C Linux driver for TPM ST33ZP24 - * Copyright (C) 2009, 2010 STMicroelectronics - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - * STMicroelectronics version 1.2.0, Copyright (C) 2010 - * STMicroelectronics comes with ABSOLUTELY NO WARRANTY. - * This is free software, and you are welcome to redistribute it - * under certain conditions. - * - * @Author: Christophe RICARD tpmsupport@st.com - * - * @File: stm_st33_tpm_i2c.h - * - * @Date: 09/15/2010 - */ -#ifndef __STM_ST33_TPM_I2C_MAIN_H__ -#define __STM_ST33_TPM_I2C_MAIN_H__ - - -#define TPM_ST33_I2C "st33zp24_i2c" - -struct st33zp24_platform_data { - int io_lpcpd; -}; - -#endif /* __STM_ST33_TPM_I2C_MAIN_H__ */ diff --git a/include/linux/platform_data/tpm_stm_st33.h b/include/linux/platform_data/tpm_stm_st33.h new file mode 100644 index 000000000000..ff75310c0f47 --- /dev/null +++ b/include/linux/platform_data/tpm_stm_st33.h @@ -0,0 +1,39 @@ +/* + * STMicroelectronics TPM I2C Linux driver for TPM ST33ZP24 + * Copyright (C) 2009, 2010 STMicroelectronics + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * STMicroelectronics version 1.2.0, Copyright (C) 2010 + * STMicroelectronics comes with ABSOLUTELY NO WARRANTY. + * This is free software, and you are welcome to redistribute it + * under certain conditions. + * + * @Author: Christophe RICARD tpmsupport@st.com + * + * @File: stm_st33_tpm.h + * + * @Date: 09/15/2010 + */ +#ifndef __STM_ST33_TPM_H__ +#define __STM_ST33_TPM_H__ + +#define TPM_ST33_I2C "st33zp24-i2c" +#define TPM_ST33_SPI "st33zp24-spi" + +struct st33zp24_platform_data { + int io_lpcpd; +}; + +#endif /* __STM_ST33_TPM_H__ */ -- cgit v1.2.3 From 3aeb66176ffa8fefd7a9f7d37bda1d8adcf469a1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 15 Jan 2015 23:49:37 +0100 Subject: net: replace br_fdb_external_learn_* calls with switchdev notifier events This patch benefits from newly introduced switchdev notifier and uses it to propagate fdb learn events from rocker driver to bridge. That avoids direct function calls and possible use by other listeners (ovs). Suggested-by: Thomas Graf Signed-off-by: Jiri Pirko Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 0a8ce762a47f..a57bca2ea97e 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -50,24 +50,6 @@ extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __use typedef int br_should_route_hook_t(struct sk_buff *skb); extern br_should_route_hook_t __rcu *br_should_route_hook; -#if IS_ENABLED(CONFIG_BRIDGE) -int br_fdb_external_learn_add(struct net_device *dev, - const unsigned char *addr, u16 vid); -int br_fdb_external_learn_del(struct net_device *dev, - const unsigned char *addr, u16 vid); -#else -static inline int br_fdb_external_learn_add(struct net_device *dev, - const unsigned char *addr, u16 vid) -{ - return 0; -} -static inline int br_fdb_external_learn_del(struct net_device *dev, - const unsigned char *addr, u16 vid) -{ - return 0; -} -#endif - #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING) int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list); -- cgit v1.2.3 From f8c58c1136349fdfa9b605c501f2f911622d3a9a Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Tue, 2 Dec 2014 15:42:47 -0800 Subject: mmc: dw_mmc: Protect read-modify-write of INTMASK with a lock We're running into cases where our enabling of the SDIO interrupt in dw_mmc doesn't actually take effect. Specifically, adding patch like this: +++ b/drivers/mmc/host/dw_mmc.c @@ -1076,6 +1076,9 @@ static void dw_mci_enable_sdio_irq(struct mmc_host *mmc, int enb) mci_writel(host, INTMASK, (int_mask | SDMMC_INT_SDIO(slot->id))); + int_mask = mci_readl(host, INTMASK); + if (!(int_mask & SDMMC_INT_SDIO(slot->id))) + dev_err(&mmc->class_dev, "failed to enable sdio irq\n"); } else { ...actually triggers the error message. That's because the dw_mci_enable_sdio_irq() unsafely does a read-modify-write of the INTMASK register. We can't just use the standard host->lock since that lock is not irq safe and mmc_signal_sdio_irq() (called from interrupt context) calls dw_mci_enable_sdio_irq(). Add a new irq-safe lock to protect INTMASK. An alternate solution to this is to punt mmc_signal_sdio_irq() to the tasklet and then protect INTMASK modifications by the standard host lock. This seemed like a bit more of a high-latency change. Reported-by: Bing Zhao Signed-off-by: Doug Anderson Reviewed-by: James Hogan Signed-off-by: Ulf Hansson --- include/linux/mmc/dw_mmc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 42b724e8d503..471fb3116dbe 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -106,6 +106,11 @@ struct mmc_data; * @cur_slot, @mrq and @state. These must always be updated * at the same time while holding @lock. * + * @irq_lock is an irq-safe spinlock protecting the INTMASK register + * to allow the interrupt handler to modify it directly. Held for only long + * enough to read-modify-write INTMASK and no other locks are grabbed when + * holding this one. + * * The @mrq field of struct dw_mci_slot is also protected by @lock, * and must always be written at the same time as the slot is added to * @queue. @@ -125,6 +130,7 @@ struct mmc_data; */ struct dw_mci { spinlock_t lock; + spinlock_t irq_lock; void __iomem *regs; struct scatterlist *sg; -- cgit v1.2.3 From 61bd8a04b379e4fefbc0b0d06941d3f91071bb8c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 12 Dec 2014 08:43:08 +1100 Subject: mmc: omap_hsmmc: remove prepare/complete system suspend support. The only function of these 'prepare' and 'complete' is to disable the 'card detect' irq during suspend. The commit which added this, commit a48ce884d5819d5df2cf1139ab3c43f8e9e419b3 mmc: omap_hsmmc: Introduce omap_hsmmc_prepare/complete justified it by the need to avoid the registration of new devices during suspend. However mmc_pm_notify will set ->rescan_disable in the 'prepare' stage and clear it in the 'complete' stage, so no card detection will actually happen. Also the interrupt will be disabled before final suspend as part of common suspend processing. So this disabling of the interrupt is unnecessary, and interferes with a transition to using common code for card-detect management. Cc: Felipe Balbi Cc: Venkatraman S Cc: Chris Ball Signed-off-by: NeilBrown Signed-off-by: Ulf Hansson --- include/linux/platform_data/mmc-omap.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mmc-omap.h b/include/linux/platform_data/mmc-omap.h index 5c188f4e9bec..929469291406 100644 --- a/include/linux/platform_data/mmc-omap.h +++ b/include/linux/platform_data/mmc-omap.h @@ -31,10 +31,6 @@ struct omap_mmc_platform_data { void (*cleanup)(struct device *dev); void (*shutdown)(struct device *dev); - /* To handle board related suspend/resume functionality for MMC */ - int (*suspend)(struct device *dev, int slot); - int (*resume)(struct device *dev, int slot); - /* Return context loss count due to PM states changing */ int (*get_context_loss_count)(struct device *dev); -- cgit v1.2.3 From eddbc3abc5bf11bdfc92ef84fd97ec4d379b7278 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 18 Dec 2014 15:44:32 +0100 Subject: mmc: slot-gpio: Remove option to explicitly free requested CD/WP GPIOs The slot-gpio uses the devm*_ managed functions. Still it provide APIs to explicitly free requested CD/WP GPIOs, but these API isn't being used. Therefore let's simplify slot-gpio by removing these unused APIs. If it later turns out we need some of them, we can always consider to restore the code. Signed-off-by: Ulf Hansson --- include/linux/mmc/slot-gpio.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index e56fa24c9322..4a36d6954631 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -15,12 +15,10 @@ struct mmc_host; int mmc_gpio_get_ro(struct mmc_host *host); int mmc_gpio_request_ro(struct mmc_host *host, unsigned int gpio); -void mmc_gpio_free_ro(struct mmc_host *host); int mmc_gpio_get_cd(struct mmc_host *host); int mmc_gpio_request_cd(struct mmc_host *host, unsigned int gpio, unsigned int debounce); -void mmc_gpio_free_cd(struct mmc_host *host); int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, unsigned int idx, bool override_active_level, @@ -28,7 +26,6 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, unsigned int idx, bool override_active_level, unsigned int debounce, bool *gpio_invert); -void mmc_gpiod_free_cd(struct mmc_host *host); void mmc_gpiod_request_cd_irq(struct mmc_host *host); #endif -- cgit v1.2.3 From df8aca162e5ff2b20c7a4de3e64e5b96ff838ab0 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 18 Dec 2014 15:44:36 +0100 Subject: mmc: slot-gpio: Rework how to handle allocation of slot-gpio data By moving the allocation of the slot-gpio data into mmc_alloc_host(), we can remove the slot-gpio internal calls to mmc_gpio_alloc(). This means mmc_gpio_alloc() has now only one caller left, which consequence allow us to simplify and remove some of the slot-gpio code. Additionally, this makes the slot-gpio mutex redundant, so let's remove it. Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 9f322706f7cb..b6bf718c3498 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -166,7 +166,6 @@ struct mmc_async_req { * struct mmc_slot - MMC slot functions * * @cd_irq: MMC/SD-card slot hotplug detection IRQ or -EINVAL - * @lock: protect the @handler_priv pointer * @handler_priv: MMC/SD-card slot context * * Some MMC/SD host controllers implement slot-functions like card and @@ -176,7 +175,6 @@ struct mmc_async_req { */ struct mmc_slot { int cd_irq; - struct mutex lock; void *handler_priv; }; -- cgit v1.2.3 From 04cdbbfa73ebac57a30ec2ebebfd7e9342bbdc44 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 1 Dec 2014 16:53:34 +0100 Subject: mmc: core: Make tuning block patterns static Since previous patches removed the need for the tuning block patterns to be exported, let's move them close to the mmc_send_tuning() API. Those are now intended to be used only by the mmc core. Signed-off-by: Ulf Hansson Reviewed-by: Stephen Boyd Acked-by: Jaehoon Chung --- include/linux/mmc/mmc.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 49ad7a943638..fb97b5cc91cd 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -53,11 +53,6 @@ #define MMC_SEND_TUNING_BLOCK 19 /* adtc R1 */ #define MMC_SEND_TUNING_BLOCK_HS200 21 /* adtc R1 */ -#define MMC_TUNING_BLK_PATTERN_4BIT_SIZE 64 -#define MMC_TUNING_BLK_PATTERN_8BIT_SIZE 128 -extern const u8 tuning_blk_pattern_4bit[MMC_TUNING_BLK_PATTERN_4BIT_SIZE]; -extern const u8 tuning_blk_pattern_8bit[MMC_TUNING_BLK_PATTERN_8BIT_SIZE]; - /* class 3 */ #define MMC_WRITE_DAT_UNTIL_STOP 20 /* adtc [31:0] data addr R1 */ -- cgit v1.2.3 From 348487cb28e66b032bae1b38424d81bf5b444408 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Tue, 9 Dec 2014 17:04:05 +0800 Subject: mmc: sdhci: use pipeline mmc requests to improve performance This patch is based on the patches by Per Forlin, Tony Lin and Ryan QIAN. This patch complete the API 'post_req' and 'pre_req' in sdhci host side, Test Env: 1. i.MX6Q-SABREAUTO board, CPU @ 996MHz, use ADMA in uSDHC controller. 2. Test command: $ echo 1 > /proc/sys/vm/drop_caches write to sd card: $ dd if=/dev/zero of=/dev/mmcblk0 bs=1M count=2000 conv=fsync read the sd card: $ dd if=/dev/mmcblk0 of=/dev/null bs=1M count=2000 3. TOSHIBA 16GB SD3.0 card, running at 4 bit, SDR104 @ 198MHZ Performance with and without this patch: ------------------------------------------------- | | read speed | write speed | |------------------------------------------------ | with this patch | ~76.7 MB/s | ~23.3 MB/s | |------------------------------------------------ |without this patch | ~60.5 MB/s | ~22.5 MB/s | ------------------------------------------------- 4. SanDisk 8GB SD3.0 card, running at 4 bit, DDR50 @ 50MHZ Performance with and without this patch: ------------------------------------------------- | | read speed | write speed | |------------------------------------------------ | with this patch | ~40.5 MB/s | ~15.6 MB/s | |------------------------------------------------ |without this patch | ~36.1 MB/s | ~14.1 MB/s | ------------------------------------------------- 5. Kingston 8GB SD2.0 card, running at 4 bit, High-speed @ 50MHZ Performance with and without this patch: ------------------------------------------------- | | read speed | write speed | |------------------------------------------------ | with this patch | ~22.7 MB/s | ~8.2 MB/s | |------------------------------------------------ |without this patch | ~21.3 MB/s | ~8.0 MB/s | ------------------------------------------------- 6. About eMMC, Sandisk 8GB eMMC on i.MX6DL-sabresd board, CPU @ 792MHZ, eMMC running at 8 bit, DDR52 @ 52MHZ. Performance with and without this patch: ------------------------------------------------- | | read speed | write speed | |------------------------------------------------ | with this patch | ~37.3 MB/s | ~10.5 MB/s | |------------------------------------------------ |without this patch | ~33.4 MB/s | ~10.5 MB/s | ------------------------------------------------- Signed-off-by: Haibo Chen Signed-off-by: Ulf Hansson --- include/linux/mmc/sdhci.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index f767a0de611f..cb8b94ff6a26 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -17,6 +17,11 @@ #include #include +struct sdhci_host_next { + unsigned int sg_count; + s32 cookie; +}; + struct sdhci_host { /* Data set by hardware interface driver */ const char *hw_name; /* Hardware bus name */ @@ -203,6 +208,7 @@ struct sdhci_host { #define SDHCI_TUNING_MODE_1 0 struct timer_list tuning_timer; /* Timer for tuning */ + struct sdhci_host_next next_data; unsigned long private[0] ____cacheline_aligned; }; #endif /* LINUX_MMC_SDHCI_H */ -- cgit v1.2.3 From 83533ab28380f6957af39a7b322e639e42dbdaf1 Mon Sep 17 00:00:00 2001 From: Johan Rudholm Date: Mon, 12 Jan 2015 15:38:04 +0100 Subject: mmc: core: always check status after reset Always check if the card is alive after a successful reset. This allows us to remove mmc_hw_reset_check(), leaving mmc_hw_reset() as the only card reset interface. Signed-off-by: Johan Rudholm Signed-off-by: Ulf Hansson --- include/linux/mmc/core.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index cb2b0400d284..160448f920ac 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -182,7 +182,6 @@ extern int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen); extern int mmc_set_blockcount(struct mmc_card *card, unsigned int blockcount, bool is_rel_write); extern int mmc_hw_reset(struct mmc_host *host); -extern int mmc_hw_reset_check(struct mmc_host *host); extern int mmc_can_reset(struct mmc_card *card); extern void mmc_set_data_timeout(struct mmc_data *, const struct mmc_card *); -- cgit v1.2.3 From c7ea834d81904b71505093f7ec50d036132cf628 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 13 Jan 2015 08:23:18 +1300 Subject: mmc: slot-gpio: Allow host driver to provide isr for card-detect interrupts One of the reasons omap_hsmmc doesn't use the slot-gpio library is that it has some non-standard functionality in the card-detect interrupt service routine. To make it possible for omap_hsmmc (and maybe others) to be converted to use slot-gpio, add 'mmc_gpio_request_cd_isr' which provide an alternate isr to be register by the slot-gpio code. Signed-off-by: NeilBrown Signed-off-by: Ulf Hansson --- include/linux/mmc/slot-gpio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index 4a36d6954631..3945a8c9d3cb 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -26,6 +26,8 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, unsigned int idx, bool override_active_level, unsigned int debounce, bool *gpio_invert); +void mmc_gpio_set_cd_isr(struct mmc_host *host, + irqreturn_t (*isr)(int irq, void *dev_id)); void mmc_gpiod_request_cd_irq(struct mmc_host *host); #endif -- cgit v1.2.3 From 85b4545629663486b7f71047ce3b54fa0ad3eb28 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 17:51:14 +0000 Subject: iommu: Consolidate IOVA allocator code In order to share the IOVA allocator with other architectures, break the unnecssary dependency on the Intel IOMMU driver and move the remaining IOVA internals to iova.c Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iova.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index 19e81d5ccb6d..ad0507c61cc7 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -39,6 +39,9 @@ static inline unsigned long iova_size(struct iova *iova) return iova->pfn_hi - iova->pfn_lo + 1; } +int iommu_iova_cache_init(void); +void iommu_iova_cache_destroy(void); + struct iova *alloc_iova_mem(void); void free_iova_mem(struct iova *iova); void free_iova(struct iova_domain *iovad, unsigned long pfn); -- cgit v1.2.3 From 1b72250076dde4276acecf3a7da722b185703e78 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 17:51:15 +0000 Subject: iommu: Make IOVA domain low limit flexible To share the IOVA allocator with other architectures, it needs to accommodate more general aperture restrictions; move the lower limit from a compile-time constant to a runtime domain property to allow IOVA domains with different requirements to co-exist. Also reword the slightly unclear description of alloc_iova since we're touching it anyway. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iova.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index ad0507c61cc7..591b19626b46 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -16,9 +16,6 @@ #include #include -/* IO virtual address start page frame number */ -#define IOVA_START_PFN (1) - /* iova structure */ struct iova { struct rb_node node; @@ -31,6 +28,7 @@ struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ struct rb_node *cached32_node; /* Save last alloced node */ + unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; }; @@ -52,7 +50,8 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); -void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit); +void init_iova_domain(struct iova_domain *iovad, unsigned long start_pfn, + unsigned long pfn_32bit); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, -- cgit v1.2.3 From 0fb5fe874c42942e16c450ae05da453e13a1c09e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 17:51:16 +0000 Subject: iommu: Make IOVA domain page size explicit Systems may contain heterogeneous IOMMUs supporting differing minimum page sizes, which may also not be common with the CPU page size. Thus it is practical to have an explicit notion of IOVA granularity to simplify handling of mapping and allocation constraints. As an initial step, move the IOVA page granularity from an implicit compile-time constant to a per-domain property so we can make use of it in IOVA domain context at runtime. To keep the abstraction tidy, extend the little API of inline iova_* helpers to parallel some of the equivalent PAGE_* macros. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iova.h | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index 591b19626b46..3920a19d8194 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -28,6 +28,7 @@ struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ struct rb_node *cached32_node; /* Save last alloced node */ + unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; }; @@ -37,6 +38,36 @@ static inline unsigned long iova_size(struct iova *iova) return iova->pfn_hi - iova->pfn_lo + 1; } +static inline unsigned long iova_shift(struct iova_domain *iovad) +{ + return __ffs(iovad->granule); +} + +static inline unsigned long iova_mask(struct iova_domain *iovad) +{ + return iovad->granule - 1; +} + +static inline size_t iova_offset(struct iova_domain *iovad, dma_addr_t iova) +{ + return iova & iova_mask(iovad); +} + +static inline size_t iova_align(struct iova_domain *iovad, size_t size) +{ + return ALIGN(size, iovad->granule); +} + +static inline dma_addr_t iova_dma_addr(struct iova_domain *iovad, struct iova *iova) +{ + return (dma_addr_t)iova->pfn_lo << iova_shift(iovad); +} + +static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) +{ + return iova >> iova_shift(iovad); +} + int iommu_iova_cache_init(void); void iommu_iova_cache_destroy(void); @@ -50,8 +81,8 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); -void init_iova_domain(struct iova_domain *iovad, unsigned long start_pfn, - unsigned long pfn_32bit); +void init_iova_domain(struct iova_domain *iovad, unsigned long granule, + unsigned long start_pfn, unsigned long pfn_32bit); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, -- cgit v1.2.3 From 72dd299d5039a336493993dcc63413cf31d0e662 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 16 Jan 2015 15:13:02 -0800 Subject: libata: allow sata_sil24 to opt-out of tag ordered submission Ronny reports: https://bugzilla.kernel.org/show_bug.cgi?id=87101 "Since commit 8a4aeec8d "libata/ahci: accommodate tag ordered controllers" the access to the harddisk on the first SATA-port is failing on its first access. The access to the harddisk on the second port is working normal. When reverting the above commit, access to both harddisks is working fine again." Maintain tag ordered submission as the default, but allow sata_sil24 to continue with the old behavior. Cc: Cc: Tejun Heo Reported-by: Ronny Hegewald Signed-off-by: Dan Williams Signed-off-by: Tejun Heo --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index f2b440e44fd7..91f705de2c0b 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -231,6 +231,7 @@ enum { ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity * led */ ATA_FLAG_NO_DIPM = (1 << 23), /* host not happy with DIPM */ + ATA_FLAG_LOWTAG = (1 << 24), /* host wants lowest available tag */ /* bits 24:31 of ap->flags are reserved for LLD specific flags */ -- cgit v1.2.3 From c7d7ddee7e24eedde6149eefbcfbfbc7125b9ff0 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Thu, 15 Jan 2015 15:09:37 +0100 Subject: ata: libahci: Allow using multiple regulators The current implementation of the libahci allows using multiple PHYs but not multiple regulators. This patch adds the support of multiple regulators. Until now it was mandatory to have a PHY under a subnode, now a port subnode can contain either a regulator or a PHY (or both). In order to be able to asociate a port with a regulator the port are now a platform device in the device tree case. There was only one driver which used directly the regulator field of the ahci_host_priv structure. To preserve the bisectability the change in the ahci_imx driver was done in the same patch. Signed-off-by: Gregory CLEMENT Acked-by: Hans de Goede Signed-off-by: Tejun Heo --- include/linux/ahci_platform.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 642d6ae4030c..f65b33809170 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -24,6 +24,8 @@ struct platform_device; int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); +int ahci_platform_enable_regulators(struct ahci_host_priv *hpriv); +void ahci_platform_disable_regulators(struct ahci_host_priv *hpriv); int ahci_platform_enable_resources(struct ahci_host_priv *hpriv); void ahci_platform_disable_resources(struct ahci_host_priv *hpriv); struct ahci_host_priv *ahci_platform_get_resources( -- cgit v1.2.3 From 54c523127bcca986c6f9b04c7b56a949ea011899 Mon Sep 17 00:00:00 2001 From: Matt Wagantall Date: Mon, 15 Dec 2014 23:47:23 +0000 Subject: iopoll: Introduce memory-mapped IO polling macros It is sometimes necessary to poll a memory-mapped register until its value satisfies some condition. Introduce a family of convenience macros that do this. Tight-looping, sleeping, and timing out can all be accomplished using these macros. Cc: Thierry Reding Cc: Andrew Morton Cc: Robert Elliott Acked-by: Arnd Bergmann Acked-by: Will Deacon Signed-off-by: Matt Wagantall Signed-off-by: Mitchel Humpherys Signed-off-by: Will Deacon --- include/linux/iopoll.h | 144 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 include/linux/iopoll.h (limited to 'include/linux') diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h new file mode 100644 index 000000000000..1c30014ed176 --- /dev/null +++ b/include/linux/iopoll.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2012-2014 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_IOPOLL_H +#define _LINUX_IOPOLL_H + +#include +#include +#include +#include +#include +#include + +/** + * readx_poll_timeout - Periodically poll an address until a condition is met or a timeout occurs + * @op: accessor function (takes @addr as its only argument) + * @addr: Address to poll + * @val: Variable to read the value into + * @cond: Break condition (usually involving @val) + * @sleep_us: Maximum time to sleep between reads in us (0 + * tight-loops). Should be less than ~20ms since usleep_range + * is used (see Documentation/timers/timers-howto.txt). + * @timeout_us: Timeout in us, 0 means never timeout + * + * Returns 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @addr is stored in @val. Must not + * be called from atomic context if sleep_us or timeout_us are used. + * + * When available, you'll probably want to use one of the specialized + * macros defined below rather than this macro directly. + */ +#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \ +({ \ + ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \ + might_sleep_if(sleep_us); \ + for (;;) { \ + (val) = op(addr); \ + if (cond) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \ + (val) = op(addr); \ + break; \ + } \ + if (sleep_us) \ + usleep_range((sleep_us >> 2) + 1, sleep_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + +/** + * readx_poll_timeout_atomic - Periodically poll an address until a condition is met or a timeout occurs + * @op: accessor function (takes @addr as its only argument) + * @addr: Address to poll + * @val: Variable to read the value into + * @cond: Break condition (usually involving @val) + * @delay_us: Time to udelay between reads in us (0 tight-loops). Should + * be less than ~10us since udelay is used (see + * Documentation/timers/timers-howto.txt). + * @timeout_us: Timeout in us, 0 means never timeout + * + * Returns 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @addr is stored in @val. + * + * When available, you'll probably want to use one of the specialized + * macros defined below rather than this macro directly. + */ +#define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \ +({ \ + ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \ + for (;;) { \ + (val) = op(addr); \ + if (cond) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \ + (val) = op(addr); \ + break; \ + } \ + if (delay_us) \ + udelay(delay_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + + +#define readb_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readb, addr, val, cond, delay_us, timeout_us) + +#define readb_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readb, addr, val, cond, delay_us, timeout_us) + +#define readw_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readw, addr, val, cond, delay_us, timeout_us) + +#define readw_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readw, addr, val, cond, delay_us, timeout_us) + +#define readl_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readl, addr, val, cond, delay_us, timeout_us) + +#define readl_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readl, addr, val, cond, delay_us, timeout_us) + +#define readq_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readq, addr, val, cond, delay_us, timeout_us) + +#define readq_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readq, addr, val, cond, delay_us, timeout_us) + +#define readb_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readb_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readb_relaxed_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readb_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readw_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readw_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readw_relaxed_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readw_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readl_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readl_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readl_relaxed_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readl_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readq_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readq_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readq_relaxed_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readq_relaxed, addr, val, cond, delay_us, timeout_us) + +#endif /* _LINUX_IOPOLL_H */ -- cgit v1.2.3 From d453cded05ee219b77815ea194dc36efa5398bca Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 20 Jan 2015 09:07:04 +1030 Subject: module_arch_freeing_init(): new hook for archs before module->module_init freed. Archs have been abusing module_free() to clean up their arch-specific allocations. Since module_free() is also (ab)used by BPF and trace code, let's keep it to simple allocations, and provide a hook called before that. This means that avr32, ia64, parisc and s390 no longer need to implement their own module_free() at all. avr32 doesn't need module_finalize() either. Signed-off-by: Rusty Russell Cc: Chris Metcalf Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Tony Luck Cc: Fenghua Yu Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux-kernel@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: linux-s390@vger.kernel.org --- include/linux/moduleloader.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 7eeb9bbfb816..054eac853090 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -82,4 +82,6 @@ int module_finalize(const Elf_Ehdr *hdr, /* Any cleanup needed when module leaves. */ void module_arch_cleanup(struct module *mod); +/* Any cleanup before freeing mod->module_init */ +void module_arch_freeing_init(struct module *mod); #endif -- cgit v1.2.3 From be1f221c0445a4157d177197c236f888d3581914 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 20 Jan 2015 09:07:05 +1030 Subject: module: remove mod arg from module_free, rename module_memfree(). Nothing needs the module pointer any more, and the next patch will call it from RCU, where the module itself might no longer exist. Removing the arg is the safest approach. This just codifies the use of the module_alloc/module_free pattern which ftrace and bpf use. Signed-off-by: Rusty Russell Acked-by: Alexei Starovoitov Cc: Mikael Starvik Cc: Jesper Nilsson Cc: Ralf Baechle Cc: Ley Foon Tan Cc: Benjamin Herrenschmidt Cc: Chris Metcalf Cc: Steven Rostedt Cc: x86@kernel.org Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Masami Hiramatsu Cc: linux-cris-kernel@axis.com Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: nios2-dev@lists.rocketboards.org Cc: linuxppc-dev@lists.ozlabs.org Cc: sparclinux@vger.kernel.org Cc: netdev@vger.kernel.org --- include/linux/moduleloader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 054eac853090..f7556261fe3c 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -26,7 +26,7 @@ unsigned int arch_mod_section_prepend(struct module *mod, unsigned int section); void *module_alloc(unsigned long size); /* Free memory returned from module_alloc. */ -void module_free(struct module *mod, void *module_region); +void module_memfree(void *module_region); /* * Apply the given relocation to the (simplified) ELF. Return -error -- cgit v1.2.3 From 67d0d04a762db4bd610fd628ad683b5d7dc905e7 Mon Sep 17 00:00:00 2001 From: Vincent Yang Date: Tue, 20 Jan 2015 16:05:16 +0800 Subject: mmc: sdhci: add a quirk for tuning work around This patch defines a quirk for tuning work around for some sdhci host controller. It sets both SDHCI_CTRL_EXEC_TUNING and SDHCI_CTRL_TUNED_CLK for tuning. It is a preparation and will be used by Fujitsu SDHCI controller f_sdh30 driver. Signed-off-by: Vincent Yang Signed-off-by: Ulf Hansson --- include/linux/mmc/sdhci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index cb8b94ff6a26..933b897ca095 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -111,6 +111,8 @@ struct sdhci_host { #define SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD (1<<10) /* Capability register bit-63 indicates HS400 support */ #define SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400 (1<<11) +/* forced tuned clock */ +#define SDHCI_QUIRK2_TUNING_WORK_AROUND (1<<12) int irq; /* Device IRQ */ void __iomem *ioaddr; /* Mapped address */ -- cgit v1.2.3 From d3fc5d71ac4dfd28a66689cfd1eea84c4dba8bde Mon Sep 17 00:00:00 2001 From: Vincent Yang Date: Tue, 20 Jan 2015 16:05:17 +0800 Subject: mmc: sdhci: add a quirk for single block transactions This patch defines a quirk to disable the block count for single block transactions. It is a preparation and will be used by Fujitsu SDHCI controller f_sdh30 driver. Signed-off-by: Vincent Yang Signed-off-by: Ulf Hansson --- include/linux/mmc/sdhci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 933b897ca095..c3e3db196738 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -113,6 +113,8 @@ struct sdhci_host { #define SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400 (1<<11) /* forced tuned clock */ #define SDHCI_QUIRK2_TUNING_WORK_AROUND (1<<12) +/* disable the block count for single block transactions */ +#define SDHCI_QUIRK2_SUPPORT_SINGLE (1<<13) int irq; /* Device IRQ */ void __iomem *ioaddr; /* Mapped address */ -- cgit v1.2.3 From 94b110aff8679b14f46fd6653ea87b42fe1555be Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 13 Jan 2015 04:57:22 +0000 Subject: mmc: tmio: add tmio_mmc_host_alloc/free() Current tmio_mmc driver is using tmio_mmc_data for driver/platform specific data/callback, and it is needed for tmio_mmc_host_probe() function. Because of this style, include/linux/mfd/tmio.h header has tmio driver/framework specific data which is not needed from platform. This patch adds new tmio_mmc_host_alloc/free() as cleanup preparation. tmio driver specific data/callback will be implemented in tmio_mmc_host, and platform specific data/callback will be implemented in tmio_mmc_data in this cleanup. Signed-off-by: Kuninori Morimoto Acked-by: Lee Jones Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 57388171610d..c7d9af042d09 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -135,7 +135,6 @@ struct tmio_mmc_data { unsigned long bus_shift; u32 ocr_mask; /* available voltages */ struct tmio_mmc_dma *dma; - struct device *dev; unsigned int cd_gpio; void (*set_pwr)(struct platform_device *host, int state); void (*set_clk_div)(struct platform_device *host, int state); -- cgit v1.2.3 From 7ecc09bab1e856e6730a4dd8a3bc1c28bb6ab3be Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 13 Jan 2015 04:57:33 +0000 Subject: mmc: tmio: tmio_mmc_host has .dma Current .dma is implemented under tmio_mmc_data. It goes to tmio_mmc_host by this patch. Signed-off-by: Kuninori Morimoto Acked-by: Lee Jones Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index c7d9af042d09..8d708c7cf681 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -112,16 +112,6 @@ void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state); struct dma_chan; -struct tmio_mmc_dma { - void *chan_priv_tx; - void *chan_priv_rx; - int slave_id_tx; - int slave_id_rx; - int alignment_shift; - dma_addr_t dma_rx_offset; - bool (*filter)(struct dma_chan *chan, void *arg); -}; - struct tmio_mmc_host; /* @@ -134,7 +124,6 @@ struct tmio_mmc_data { unsigned long flags; unsigned long bus_shift; u32 ocr_mask; /* available voltages */ - struct tmio_mmc_dma *dma; unsigned int cd_gpio; void (*set_pwr)(struct platform_device *host, int state); void (*set_clk_div)(struct platform_device *host, int state); -- cgit v1.2.3 From dfe9a229e0a66b6a00439cea2885ad3b5d3e0840 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 13 Jan 2015 04:57:42 +0000 Subject: mmc: tmio: tmio_mmc_host has .write16_hook Current .write16_hook is implemented under tmio_mmc_data. It goes to tmio_mmc_host by this patch. Signed-off-by: Kuninori Morimoto Acked-by: Lee Jones Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 8d708c7cf681..0872ca1e5dc2 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -112,8 +112,6 @@ void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state); struct dma_chan; -struct tmio_mmc_host; - /* * data for the MMC controller */ @@ -127,7 +125,6 @@ struct tmio_mmc_data { unsigned int cd_gpio; void (*set_pwr)(struct platform_device *host, int state); void (*set_clk_div)(struct platform_device *host, int state); - int (*write16_hook)(struct tmio_mmc_host *host, int addr); /* clock management callbacks */ int (*clk_enable)(struct platform_device *pdev, unsigned int *f); void (*clk_disable)(struct platform_device *pdev); -- cgit v1.2.3 From 4fe2ec57a15f98c232536cf04e7c139d830955d4 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 13 Jan 2015 04:57:52 +0000 Subject: mmc: tmio: tmio_mmc_host has .clk_enable Current .clk_enable is implemented under tmio_mmc_data. It goes to tmio_mmc_host by this patch. Signed-off-by: Kuninori Morimoto Acked-by: Lee Jones Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 0872ca1e5dc2..472587a02a42 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -126,7 +126,6 @@ struct tmio_mmc_data { void (*set_pwr)(struct platform_device *host, int state); void (*set_clk_div)(struct platform_device *host, int state); /* clock management callbacks */ - int (*clk_enable)(struct platform_device *pdev, unsigned int *f); void (*clk_disable)(struct platform_device *pdev); int (*multi_io_quirk)(struct mmc_card *card, unsigned int direction, int blk_size); -- cgit v1.2.3 From 00452c11ea0e4e5822edf0ac46853933860d0f53 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 13 Jan 2015 04:58:01 +0000 Subject: mmc: tmio: tmio_mmc_host has .clk_disable Current .clk_disable is implemented under tmio_mmc_data. It goes to tmio_mmc_host by this patch. Signed-off-by: Kuninori Morimoto Acked-by: Lee Jones Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 472587a02a42..a3f78da70af7 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -125,8 +125,7 @@ struct tmio_mmc_data { unsigned int cd_gpio; void (*set_pwr)(struct platform_device *host, int state); void (*set_clk_div)(struct platform_device *host, int state); - /* clock management callbacks */ - void (*clk_disable)(struct platform_device *pdev); + int (*multi_io_quirk)(struct mmc_card *card, unsigned int direction, int blk_size); }; -- cgit v1.2.3 From 85c02ddd591e5252eb1cbe8743a839638d7415fd Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 13 Jan 2015 04:58:10 +0000 Subject: mmc: tmio: tmio_mmc_host has .multi_io_quirk Current .multi_io_quirk is implemented under tmio_mmc_data. It goes to tmio_mmc_host by this patch. Signed-off-by: Kuninori Morimoto Acked-by: Lee Jones Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index a3f78da70af7..3edaa17d1ccd 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -125,9 +125,6 @@ struct tmio_mmc_data { unsigned int cd_gpio; void (*set_pwr)(struct platform_device *host, int state); void (*set_clk_div)(struct platform_device *host, int state); - - int (*multi_io_quirk)(struct mmc_card *card, - unsigned int direction, int blk_size); }; /* -- cgit v1.2.3 From 7445bf9e6f4e5d7755e22c7c9b06f4ae0d6160c6 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 13 Jan 2015 04:58:20 +0000 Subject: mmc: tmio: tmio_mmc_host has .bus_shift Current .bus_shift is implemented under tmio_mmc_data. It goes to tmio_mmc_host by this patch. Signed-off-by: Kuninori Morimoto Acked-by: Lee Jones Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 3edaa17d1ccd..7a5c27948315 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -120,7 +120,6 @@ struct tmio_mmc_data { unsigned long capabilities; unsigned long capabilities2; unsigned long flags; - unsigned long bus_shift; u32 ocr_mask; /* available voltages */ unsigned int cd_gpio; void (*set_pwr)(struct platform_device *host, int state); -- cgit v1.2.3 From e471df0bcaa137f1bbe7c5f75db6ce7566caa292 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 13 Jan 2015 04:58:46 +0000 Subject: mmc: tmio: tmio_mmc_data has .alignment_shift Current .alignment_shift is implemented under tmio_mmc_dma. It goes to tmio_mmc_data by this patch. Signed-off-by: Kuninori Morimoto Acked-by: Lee Jones Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 7a5c27948315..28a12d10e509 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -122,6 +122,7 @@ struct tmio_mmc_data { unsigned long flags; u32 ocr_mask; /* available voltages */ unsigned int cd_gpio; + int alignment_shift; void (*set_pwr)(struct platform_device *host, int state); void (*set_clk_div)(struct platform_device *host, int state); }; -- cgit v1.2.3 From 8b4c8f32da91681c0dcd321c9e3cd14f866c5517 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 13 Jan 2015 04:58:56 +0000 Subject: mmc: tmio: tmio_mmc_data has .dma_rx_offset Current .dma_rx_offset is implemented under tmio_mmc_dma. It goes to tmio_mmc_data by this patch. Signed-off-by: Kuninori Morimoto Acked-by: Lee Jones Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 28a12d10e509..807ecfb162d7 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -123,6 +123,7 @@ struct tmio_mmc_data { u32 ocr_mask; /* available voltages */ unsigned int cd_gpio; int alignment_shift; + dma_addr_t dma_rx_offset; void (*set_pwr)(struct platform_device *host, int state); void (*set_clk_div)(struct platform_device *host, int state); }; -- cgit v1.2.3 From 010f4aa758f437647799b1fd677a5e2cf31714e9 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 13 Jan 2015 04:59:24 +0000 Subject: mmc: sh_mobile_sdhi: remove .init/.cleanup No one is using .init/.cleanup callback function. Let's remove these. sdhi_ops and .cd_wakeup are also removed Signed-off-by: Kuninori Morimoto Signed-off-by: Ulf Hansson --- include/linux/mmc/sh_mobile_sdhi.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sh_mobile_sdhi.h b/include/linux/mmc/sh_mobile_sdhi.h index 68927ae50845..da77e5e2041d 100644 --- a/include/linux/mmc/sh_mobile_sdhi.h +++ b/include/linux/mmc/sh_mobile_sdhi.h @@ -3,20 +3,10 @@ #include -struct platform_device; - #define SH_MOBILE_SDHI_IRQ_CARD_DETECT "card_detect" #define SH_MOBILE_SDHI_IRQ_SDCARD "sdcard" #define SH_MOBILE_SDHI_IRQ_SDIO "sdio" -/** - * struct sh_mobile_sdhi_ops - SDHI driver callbacks - * @cd_wakeup: trigger a card-detection run - */ -struct sh_mobile_sdhi_ops { - void (*cd_wakeup)(const struct platform_device *pdev); -}; - struct sh_mobile_sdhi_info { int dma_slave_tx; int dma_slave_rx; @@ -25,11 +15,6 @@ struct sh_mobile_sdhi_info { unsigned long tmio_caps2; u32 tmio_ocr_mask; /* available MMC voltages */ unsigned int cd_gpio; - - /* callbacks for board specific setup code */ - int (*init)(struct platform_device *pdev, - const struct sh_mobile_sdhi_ops *ops); - void (*cleanup)(struct platform_device *pdev); }; #endif /* LINUX_MMC_SH_MOBILE_SDHI_H */ -- cgit v1.2.3 From de122cb1745313f331dc7c7923b484343d455e64 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 13 Jan 2015 05:00:39 +0000 Subject: mmc: tmio: remove TMIO_MMC_HAVE_CTL_DMA_REG flag tmio_mmc_host has .enable_dma callback now. We don't need TMIO_MMC_HAVE_CTL_DMA_REG anymore. Let's remove it Signed-off-by: Kuninori Morimoto Acked-by: Lee Jones Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 807ecfb162d7..605812820e48 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -95,11 +95,6 @@ */ #define TMIO_MMC_SDIO_STATUS_QUIRK (1 << 8) -/* - * Some controllers have DMA enable/disable register - */ -#define TMIO_MMC_HAVE_CTL_DMA_REG (1 << 9) - /* * Some controllers allows to set SDx actual clock */ -- cgit v1.2.3 From 1dfb4a0d7615811ec4a61b0a7631c8ddc0baf335 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 13 Jan 2015 08:00:29 +0100 Subject: gpio: stmpe: enforce device tree only mode Require that device tree be used with STMPE (all platforms use this) and enforce OF_GPIO, then delete the platform data. Signed-off-by: Linus Walleij --- include/linux/mfd/stmpe.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h index f742b6717d52..c9d869027300 100644 --- a/include/linux/mfd/stmpe.h +++ b/include/linux/mfd/stmpe.h @@ -117,20 +117,6 @@ extern int stmpe_disable(struct stmpe *stmpe, unsigned int blocks); #define STMPE_GPIO_NOREQ_811_TOUCH (0xf0) -/** - * struct stmpe_gpio_platform_data - STMPE GPIO platform data - * @norequest_mask: bitmask specifying which GPIOs should _not_ be - * requestable due to different usage (e.g. touch, keypad) - * STMPE_GPIO_NOREQ_* macros can be used here. - * @setup: board specific setup callback. - * @remove: board specific remove callback - */ -struct stmpe_gpio_platform_data { - unsigned norequest_mask; - void (*setup)(struct stmpe *stmpe, unsigned gpio_base); - void (*remove)(struct stmpe *stmpe, unsigned gpio_base); -}; - /** * struct stmpe_ts_platform_data - stmpe811 touch screen controller platform * data @@ -182,7 +168,6 @@ struct stmpe_ts_platform_data { * @irq_over_gpio: true if gpio is used to get irq * @irq_gpio: gpio number over which irq will be requested (significant only if * irq_over_gpio is true) - * @gpio: GPIO-specific platform data * @ts: touchscreen-specific platform data */ struct stmpe_platform_data { @@ -194,7 +179,6 @@ struct stmpe_platform_data { int irq_gpio; int autosleep_timeout; - struct stmpe_gpio_platform_data *gpio; struct stmpe_ts_platform_data *ts; }; -- cgit v1.2.3 From 4b6eade76ad19183464b739e9af1efacdb1bbda8 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 20 Jan 2015 11:00:53 +0100 Subject: mfd: max77693: Add defines for MAX77693 charger driver Prepare for adding support for Maxim 77693 charger by adding necessary new defines. Signed-off-by: Krzysztof Kozlowski Acked-by: Lee Jones Signed-off-by: Sebastian Reichel --- include/linux/mfd/max77693-private.h | 108 +++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index 08dae01258b9..955dd990beaf 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -143,10 +143,118 @@ enum max77693_pmic_reg { #define FLASH_INT_FLED1_SHORT BIT(3) #define FLASH_INT_OVER_CURRENT BIT(4) +/* Fast charge timer in in hours */ +#define DEFAULT_FAST_CHARGE_TIMER 4 +/* microamps */ +#define DEFAULT_TOP_OFF_THRESHOLD_CURRENT 150000 +/* minutes */ +#define DEFAULT_TOP_OFF_TIMER 30 +/* microvolts */ +#define DEFAULT_CONSTANT_VOLT 4200000 +/* microvolts */ +#define DEFAULT_MIN_SYSTEM_VOLT 3600000 +/* celsius */ +#define DEFAULT_THERMAL_REGULATION_TEMP 100 +/* microamps */ +#define DEFAULT_BATTERY_OVERCURRENT 3500000 +/* microvolts */ +#define DEFAULT_CHARGER_INPUT_THRESHOLD_VOLT 4300000 + +/* MAX77693_CHG_REG_CHG_INT_OK register */ +#define CHG_INT_OK_BYP_SHIFT 0 +#define CHG_INT_OK_BAT_SHIFT 3 +#define CHG_INT_OK_CHG_SHIFT 4 +#define CHG_INT_OK_CHGIN_SHIFT 6 +#define CHG_INT_OK_DETBAT_SHIFT 7 +#define CHG_INT_OK_BYP_MASK BIT(CHG_INT_OK_BYP_SHIFT) +#define CHG_INT_OK_BAT_MASK BIT(CHG_INT_OK_BAT_SHIFT) +#define CHG_INT_OK_CHG_MASK BIT(CHG_INT_OK_CHG_SHIFT) +#define CHG_INT_OK_CHGIN_MASK BIT(CHG_INT_OK_CHGIN_SHIFT) +#define CHG_INT_OK_DETBAT_MASK BIT(CHG_INT_OK_DETBAT_SHIFT) + +/* MAX77693_CHG_REG_CHG_DETAILS_00 register */ +#define CHG_DETAILS_00_CHGIN_SHIFT 5 +#define CHG_DETAILS_00_CHGIN_MASK (0x3 << CHG_DETAILS_00_CHGIN_SHIFT) + +/* MAX77693_CHG_REG_CHG_DETAILS_01 register */ +#define CHG_DETAILS_01_CHG_SHIFT 0 +#define CHG_DETAILS_01_BAT_SHIFT 4 +#define CHG_DETAILS_01_TREG_SHIFT 7 +#define CHG_DETAILS_01_CHG_MASK (0xf << CHG_DETAILS_01_CHG_SHIFT) +#define CHG_DETAILS_01_BAT_MASK (0x7 << CHG_DETAILS_01_BAT_SHIFT) +#define CHG_DETAILS_01_TREG_MASK BIT(7) + +/* MAX77693_CHG_REG_CHG_DETAILS_01/CHG field */ +enum max77693_charger_charging_state { + MAX77693_CHARGING_PREQUALIFICATION = 0x0, + MAX77693_CHARGING_FAST_CONST_CURRENT, + MAX77693_CHARGING_FAST_CONST_VOLTAGE, + MAX77693_CHARGING_TOP_OFF, + MAX77693_CHARGING_DONE, + MAX77693_CHARGING_HIGH_TEMP, + MAX77693_CHARGING_TIMER_EXPIRED, + MAX77693_CHARGING_THERMISTOR_SUSPEND, + MAX77693_CHARGING_OFF, + MAX77693_CHARGING_RESERVED, + MAX77693_CHARGING_OVER_TEMP, + MAX77693_CHARGING_WATCHDOG_EXPIRED, +}; + +/* MAX77693_CHG_REG_CHG_DETAILS_01/BAT field */ +enum max77693_charger_battery_state { + MAX77693_BATTERY_NOBAT = 0x0, + /* Dead-battery or low-battery prequalification */ + MAX77693_BATTERY_PREQUALIFICATION, + MAX77693_BATTERY_TIMER_EXPIRED, + MAX77693_BATTERY_GOOD, + MAX77693_BATTERY_LOWVOLTAGE, + MAX77693_BATTERY_OVERVOLTAGE, + MAX77693_BATTERY_OVERCURRENT, + MAX77693_BATTERY_RESERVED, +}; + +/* MAX77693_CHG_REG_CHG_DETAILS_02 register */ +#define CHG_DETAILS_02_BYP_SHIFT 0 +#define CHG_DETAILS_02_BYP_MASK (0xf << CHG_DETAILS_02_BYP_SHIFT) + /* MAX77693 CHG_CNFG_00 register */ #define CHG_CNFG_00_CHG_MASK 0x1 #define CHG_CNFG_00_BUCK_MASK 0x4 +/* MAX77693_CHG_REG_CHG_CNFG_01 register */ +#define CHG_CNFG_01_FCHGTIME_SHIFT 0 +#define CHG_CNFG_01_CHGRSTRT_SHIFT 4 +#define CHG_CNFG_01_PQEN_SHIFT 7 +#define CHG_CNFG_01_FCHGTIME_MASK (0x7 << CHG_CNFG_01_FCHGTIME_SHIFT) +#define CHG_CNFG_01_CHGRSTRT_MASK (0x3 << CHG_CNFG_01_CHGRSTRT_SHIFT) +#define CHG_CNFG_01_PQEN_MAKS BIT(CHG_CNFG_01_PQEN_SHIFT) + +/* MAX77693_CHG_REG_CHG_CNFG_03 register */ +#define CHG_CNFG_03_TOITH_SHIFT 0 +#define CHG_CNFG_03_TOTIME_SHIFT 3 +#define CHG_CNFG_03_TOITH_MASK (0x7 << CHG_CNFG_03_TOITH_SHIFT) +#define CHG_CNFG_03_TOTIME_MASK (0x7 << CHG_CNFG_03_TOTIME_SHIFT) + +/* MAX77693_CHG_REG_CHG_CNFG_04 register */ +#define CHG_CNFG_04_CHGCVPRM_SHIFT 0 +#define CHG_CNFG_04_MINVSYS_SHIFT 5 +#define CHG_CNFG_04_CHGCVPRM_MASK (0x1f << CHG_CNFG_04_CHGCVPRM_SHIFT) +#define CHG_CNFG_04_MINVSYS_MASK (0x7 << CHG_CNFG_04_MINVSYS_SHIFT) + +/* MAX77693_CHG_REG_CHG_CNFG_06 register */ +#define CHG_CNFG_06_CHGPROT_SHIFT 2 +#define CHG_CNFG_06_CHGPROT_MASK (0x3 << CHG_CNFG_06_CHGPROT_SHIFT) + +/* MAX77693_CHG_REG_CHG_CNFG_07 register */ +#define CHG_CNFG_07_REGTEMP_SHIFT 5 +#define CHG_CNFG_07_REGTEMP_MASK (0x3 << CHG_CNFG_07_REGTEMP_SHIFT) + +/* MAX77693_CHG_REG_CHG_CNFG_12 register */ +#define CHG_CNFG_12_B2SOVRC_SHIFT 0 +#define CHG_CNFG_12_VCHGINREG_SHIFT 3 +#define CHG_CNFG_12_B2SOVRC_MASK (0x7 << CHG_CNFG_12_B2SOVRC_SHIFT) +#define CHG_CNFG_12_VCHGINREG_MASK (0x3 << CHG_CNFG_12_VCHGINREG_SHIFT) + /* MAX77693 CHG_CNFG_09 Register */ #define CHG_CNFG_09_CHGIN_ILIM_MASK 0x7F -- cgit v1.2.3 From 2fded7f44b8fcf79e274c3f0cfbd0298f95308f3 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 23 Dec 2014 16:39:54 -0500 Subject: audit: remove vestiges of vers_ops Should have been removed with commit 18900909 ("audit: remove the old depricated kernel interface"). Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/audit.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 93331929d643..b481779a8dee 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -46,7 +46,6 @@ struct audit_tree; struct sk_buff; struct audit_krule { - int vers_ops; u32 pflags; u32 flags; u32 listnr; -- cgit v1.2.3 From 39e794bff718188cfb9ace2032cbe4fd86048dc6 Mon Sep 17 00:00:00 2001 From: Yaniv Gardi Date: Thu, 15 Jan 2015 16:32:36 +0200 Subject: phy: qcom-ufs: add support for 20nm phy This change adds a support for a 20nm qcom-ufs phy that is required in platforms that use ufs-qcom controller. Signed-off-by: Yaniv Gardi Reviewed-by: Dov Levenglick Signed-off-by: Christoph Hellwig --- include/linux/phy/phy-qcom-ufs.h | 59 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 include/linux/phy/phy-qcom-ufs.h (limited to 'include/linux') diff --git a/include/linux/phy/phy-qcom-ufs.h b/include/linux/phy/phy-qcom-ufs.h new file mode 100644 index 000000000000..9d18e9f948e9 --- /dev/null +++ b/include/linux/phy/phy-qcom-ufs.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2013-2015, Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef PHY_QCOM_UFS_H_ +#define PHY_QCOM_UFS_H_ + +#include "phy.h" + +/** + * ufs_qcom_phy_enable_ref_clk() - Enable the phy + * ref clock. + * @phy: reference to a generic phy + * + * returns 0 for success, and non-zero for error. + */ +int ufs_qcom_phy_enable_ref_clk(struct phy *phy); + +/** + * ufs_qcom_phy_disable_ref_clk() - Disable the phy + * ref clock. + * @phy: reference to a generic phy. + */ +void ufs_qcom_phy_disable_ref_clk(struct phy *phy); + +/** + * ufs_qcom_phy_enable_dev_ref_clk() - Enable the device + * ref clock. + * @phy: reference to a generic phy. + */ +void ufs_qcom_phy_enable_dev_ref_clk(struct phy *phy); + +/** + * ufs_qcom_phy_disable_dev_ref_clk() - Disable the device + * ref clock. + * @phy: reference to a generic phy. + */ +void ufs_qcom_phy_disable_dev_ref_clk(struct phy *phy); + +int ufs_qcom_phy_enable_iface_clk(struct phy *phy); +void ufs_qcom_phy_disable_iface_clk(struct phy *phy); +int ufs_qcom_phy_start_serdes(struct phy *phy); +int ufs_qcom_phy_set_tx_lane_enable(struct phy *phy, u32 tx_lanes); +int ufs_qcom_phy_calibrate_phy(struct phy *phy, bool is_rate_B); +int ufs_qcom_phy_is_pcs_ready(struct phy *phy); +void ufs_qcom_phy_save_controller_version(struct phy *phy, + u8 major, u16 minor, u16 step); + +#endif /* PHY_QCOM_UFS_H_ */ -- cgit v1.2.3 From 3c33f5b99d688deafd21d4a770303691c7c3a320 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 20 Jan 2015 09:26:19 -0600 Subject: livepatch: support for repatching a function Add support for patching a function multiple times. If multiple patches affect a function, the function in the most recently enabled patch "wins". This enables a cumulative patch upgrade path, where each patch is a superset of previous patches. This requires restructuring the data a little bit. With the current design, where each klp_func struct has its own ftrace_ops, we'd have to unregister the old ops and then register the new ops, because FTRACE_OPS_FL_IPMODIFY prevents us from having two ops registered for the same function at the same time. That would leave a regression window where the function isn't patched at all (not good for a patch upgrade path). This patch replaces the per-klp_func ftrace_ops with a global klp_ops list, with one ftrace_ops per original function. A single ftrace_ops is shared between all klp_funcs which have the same old_addr. This allows the switch between function versions to happen instantaneously by updating the klp_ops struct's func_stack list. The winner is the klp_func at the top of the func_stack (front of the list). [ jkosina@suse.cz: turn WARN_ON() into WARN_ON_ONCE() in ftrace handler to avoid storm in pathological cases ] Signed-off-by: Josh Poimboeuf Reviewed-by: Jiri Slaby Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 950bc615842f..f14c6fb262b4 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -40,8 +40,8 @@ enum klp_state { * @old_addr: a hint conveying at what address the old function * can be found (optional, vmlinux patches only) * @kobj: kobject for sysfs resources - * @fops: ftrace operations structure * @state: tracks function-level patch application state + * @stack_node: list node for klp_ops func_stack list */ struct klp_func { /* external */ @@ -59,8 +59,8 @@ struct klp_func { /* internal */ struct kobject kobj; - struct ftrace_ops *fops; enum klp_state state; + struct list_head stack_node; }; /** -- cgit v1.2.3 From 97b713ba3ebaa6c8d84c2c720f5468a7c6a6eb4e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2015 10:42:31 +0100 Subject: fs: kill BDI_CAP_SWAP_BACKED This bdi flag isn't too useful - we can determine that a vma is backed by either swap or shmem trivially in the caller. This also allows removing the backing_dev_info instaces for swap and shmem in favor of noop_backing_dev_info. Signed-off-by: Christoph Hellwig Reviewed-by: Tejun Heo Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 5da6012b7a14..e936cea856dc 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -238,8 +238,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); * BDI_CAP_WRITE_MAP: Can be mapped for writing * BDI_CAP_EXEC_MAP: Can be mapped for execution * - * BDI_CAP_SWAP_BACKED: Count shmem/tmpfs objects as swap-backed. - * * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. */ #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 @@ -250,7 +248,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); #define BDI_CAP_WRITE_MAP 0x00000020 #define BDI_CAP_EXEC_MAP 0x00000040 #define BDI_CAP_NO_ACCT_WB 0x00000080 -#define BDI_CAP_SWAP_BACKED 0x00000100 #define BDI_CAP_STABLE_WRITES 0x00000200 #define BDI_CAP_STRICTLIMIT 0x00000400 @@ -329,11 +326,6 @@ static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi) BDI_CAP_NO_WRITEBACK)); } -static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi) -{ - return bdi->capabilities & BDI_CAP_SWAP_BACKED; -} - static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) { return bdi_cap_writeback_dirty(mapping->backing_dev_info); @@ -344,11 +336,6 @@ static inline bool mapping_cap_account_dirty(struct address_space *mapping) return bdi_cap_account_dirty(mapping->backing_dev_info); } -static inline bool mapping_cap_swap_backed(struct address_space *mapping) -{ - return bdi_cap_swap_backed(mapping->backing_dev_info); -} - static inline int bdi_sched_wait(void *word) { schedule(); -- cgit v1.2.3 From b4caecd48005fbed3949dde6c1cb233142fd69e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2015 10:42:32 +0100 Subject: fs: introduce f_op->mmap_capabilities for nommu mmap support Since "BDI: Provide backing device capability information [try #3]" the backing_dev_info structure also provides flags for the kind of mmap operation available in a nommu environment, which is entirely unrelated to it's original purpose. Introduce a new nommu-only file operation to provide this information to the nommu mmap code instead. Splitting this from the backing_dev_info structure allows to remove lots of backing_dev_info instance that aren't otherwise needed, and entirely gets rid of the concept of providing a backing_dev_info for a character device. It also removes the need for the mtd_inodefs filesystem. Signed-off-by: Christoph Hellwig Reviewed-by: Tejun Heo Acked-by: Brian Norris Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 33 ++++----------------------------- include/linux/cdev.h | 2 -- include/linux/fs.h | 23 +++++++++++++++++++++++ include/linux/mtd/mtd.h | 2 ++ 4 files changed, 29 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e936cea856dc..478f95d92d73 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -114,7 +114,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); void bdi_unregister(struct backing_dev_info *bdi); -int __must_check bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); +int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, enum wb_reason reason); void bdi_start_background_writeback(struct backing_dev_info *bdi); @@ -228,42 +228,17 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); * BDI_CAP_NO_ACCT_DIRTY: Dirty pages shouldn't contribute to accounting * BDI_CAP_NO_WRITEBACK: Don't write pages back * BDI_CAP_NO_ACCT_WB: Don't automatically account writeback pages - * - * These flags let !MMU mmap() govern direct device mapping vs immediate - * copying more easily for MAP_PRIVATE, especially for ROM filesystems. - * - * BDI_CAP_MAP_COPY: Copy can be mapped (MAP_PRIVATE) - * BDI_CAP_MAP_DIRECT: Can be mapped directly (MAP_SHARED) - * BDI_CAP_READ_MAP: Can be mapped for reading - * BDI_CAP_WRITE_MAP: Can be mapped for writing - * BDI_CAP_EXEC_MAP: Can be mapped for execution - * * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. */ #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 -#define BDI_CAP_MAP_COPY 0x00000004 -#define BDI_CAP_MAP_DIRECT 0x00000008 -#define BDI_CAP_READ_MAP 0x00000010 -#define BDI_CAP_WRITE_MAP 0x00000020 -#define BDI_CAP_EXEC_MAP 0x00000040 -#define BDI_CAP_NO_ACCT_WB 0x00000080 -#define BDI_CAP_STABLE_WRITES 0x00000200 -#define BDI_CAP_STRICTLIMIT 0x00000400 - -#define BDI_CAP_VMFLAGS \ - (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP) +#define BDI_CAP_NO_ACCT_WB 0x00000004 +#define BDI_CAP_STABLE_WRITES 0x00000008 +#define BDI_CAP_STRICTLIMIT 0x00000010 #define BDI_CAP_NO_ACCT_AND_WRITEBACK \ (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) -#if defined(VM_MAYREAD) && \ - (BDI_CAP_READ_MAP != VM_MAYREAD || \ - BDI_CAP_WRITE_MAP != VM_MAYWRITE || \ - BDI_CAP_EXEC_MAP != VM_MAYEXEC) -#error please change backing_dev_info::capabilities flags -#endif - extern struct backing_dev_info default_backing_dev_info; extern struct backing_dev_info noop_backing_dev_info; diff --git a/include/linux/cdev.h b/include/linux/cdev.h index fb4591977b03..f8763615a5f2 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -30,6 +30,4 @@ void cdev_del(struct cdev *); void cd_forget(struct inode *); -extern struct backing_dev_info directly_mappable_cdev_bdi; - #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 42efe13077b6..1dada399aa23 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1502,6 +1502,26 @@ struct block_device_operations; #define HAVE_COMPAT_IOCTL 1 #define HAVE_UNLOCKED_IOCTL 1 +/* + * These flags let !MMU mmap() govern direct device mapping vs immediate + * copying more easily for MAP_PRIVATE, especially for ROM filesystems. + * + * NOMMU_MAP_COPY: Copy can be mapped (MAP_PRIVATE) + * NOMMU_MAP_DIRECT: Can be mapped directly (MAP_SHARED) + * NOMMU_MAP_READ: Can be mapped for reading + * NOMMU_MAP_WRITE: Can be mapped for writing + * NOMMU_MAP_EXEC: Can be mapped for execution + */ +#define NOMMU_MAP_COPY 0x00000001 +#define NOMMU_MAP_DIRECT 0x00000008 +#define NOMMU_MAP_READ VM_MAYREAD +#define NOMMU_MAP_WRITE VM_MAYWRITE +#define NOMMU_MAP_EXEC VM_MAYEXEC + +#define NOMMU_VMFLAGS \ + (NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC) + + struct iov_iter; struct file_operations { @@ -1536,6 +1556,9 @@ struct file_operations { long (*fallocate)(struct file *file, int mode, loff_t offset, loff_t len); void (*show_fdinfo)(struct seq_file *m, struct file *f); +#ifndef CONFIG_MMU + unsigned (*mmap_capabilities)(struct file *); +#endif }; struct inode_operations { diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 031ff3a9a0bd..3301c4c289d6 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -408,4 +408,6 @@ static inline int mtd_is_bitflip_or_eccerr(int err) { return mtd_is_bitflip(err) || mtd_is_eccerr(err); } +unsigned mtd_mmap_capabilities(struct mtd_info *mtd); + #endif /* __MTD_MTD_H__ */ -- cgit v1.2.3 From de1414a654e66b81b5348dbc5259ecf2fb61655e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2015 10:42:36 +0100 Subject: fs: export inode_to_bdi and use it in favor of mapping->backing_dev_info Now that we got rid of the bdi abuse on character devices we can always use sb->s_bdi to get at the backing_dev_info for a file, except for the block device special case. Export inode_to_bdi and replace uses of mapping->backing_dev_info with it to prepare for the removal of mapping->backing_dev_info. Signed-off-by: Christoph Hellwig Reviewed-by: Tejun Heo Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 478f95d92d73..ed59dee03a71 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -106,6 +106,8 @@ struct backing_dev_info { #endif }; +struct backing_dev_info *inode_to_bdi(struct inode *inode); + int __must_check bdi_init(struct backing_dev_info *bdi); void bdi_destroy(struct backing_dev_info *bdi); @@ -303,12 +305,12 @@ static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi) static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) { - return bdi_cap_writeback_dirty(mapping->backing_dev_info); + return bdi_cap_writeback_dirty(inode_to_bdi(mapping->host)); } static inline bool mapping_cap_account_dirty(struct address_space *mapping) { - return bdi_cap_account_dirty(mapping->backing_dev_info); + return bdi_cap_account_dirty(inode_to_bdi(mapping->host)); } static inline int bdi_sched_wait(void *word) -- cgit v1.2.3 From b83ae6d421435c6204150300f1c25bfbd39cd62b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2015 10:42:37 +0100 Subject: fs: remove mapping->backing_dev_info Now that we never use the backing_dev_info pointer in struct address_space we can simply remove it and save 4 to 8 bytes in every inode. Signed-off-by: Christoph Hellwig Acked-by: Ryusuke Konishi Reviewed-by: Tejun Heo Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1dada399aa23..65d02de342e1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -34,6 +34,7 @@ #include #include +struct backing_dev_info; struct export_operations; struct hd_geometry; struct iovec; @@ -394,7 +395,6 @@ int pagecache_write_end(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); -struct backing_dev_info; struct address_space { struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ @@ -409,7 +409,6 @@ struct address_space { pgoff_t writeback_index;/* writeback starts here */ const struct address_space_operations *a_ops; /* methods */ unsigned long flags; /* error bits/gfp mask */ - struct backing_dev_info *backing_dev_info; /* device readahead, etc */ spinlock_t private_lock; /* for use by the address_space */ struct list_head private_list; /* ditto */ void *private_data; /* ditto */ -- cgit v1.2.3 From df0ce26cb4ee8bc233d50213b97213532aff0a3c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2015 10:42:41 +0100 Subject: fs: remove default_backing_dev_info Now that default_backing_dev_info is not used for writeback purposes we can git rid of it easily: - instead of using it's name for tracing unregistered bdi we just use "unknown" - btrfs and ceph can just assign the default read ahead window themselves like several other filesystems already do. - we can assign noop_backing_dev_info as the default one in alloc_super. All filesystems already either assigned their own or noop_backing_dev_info. Signed-off-by: Christoph Hellwig Reviewed-by: Tejun Heo Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index ed59dee03a71..d94077fea1f8 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -241,7 +241,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); #define BDI_CAP_NO_ACCT_AND_WRITEBACK \ (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) -extern struct backing_dev_info default_backing_dev_info; extern struct backing_dev_info noop_backing_dev_info; int writeback_in_progress(struct backing_dev_info *bdi); -- cgit v1.2.3 From 85cdf36e11557dc367c1361e4b7bb2c4619cae91 Mon Sep 17 00:00:00 2001 From: Rickard Strandqvist Date: Thu, 1 Jan 2015 18:04:52 +0100 Subject: power: ab8500_fg.c: Remove unused function Remove the function ab8500_fg_reinit() that is not used anywhere. This was partially found by using a static code analysis program called cppcheck. Signed-off-by: Rickard Strandqvist Acked-by: Arnd Bergmann Signed-off-by: Sebastian Reichel --- include/linux/mfd/abx500/ab8500-bm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h index cc892a8d8d6e..12a5b396921e 100644 --- a/include/linux/mfd/abx500/ab8500-bm.h +++ b/include/linux/mfd/abx500/ab8500-bm.h @@ -461,7 +461,6 @@ struct ab8500_fg; #ifdef CONFIG_AB8500_BM extern struct abx500_bm_data ab8500_bm_data; -void ab8500_fg_reinit(void); void ab8500_charger_usb_state_changed(u8 bm_usb_state, u16 mA); struct ab8500_btemp *ab8500_btemp_get(void); int ab8500_btemp_get_batctrl_temp(struct ab8500_btemp *btemp); -- cgit v1.2.3 From 9c45101e88b2bf2ce36b8833fcfa784a9149aa74 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 19 Nov 2014 09:21:58 +0100 Subject: quota: Cleanup flags definitions Currently all quota flags were defined just in kernel-private headers. Export flags readable / writeable from userspace to userspace via include/uapi/linux/quota.h. Signed-off-by: Jan Kara --- include/linux/dqblk_v1.h | 3 --- include/linux/quota.h | 14 ++++++++------ 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dqblk_v1.h b/include/linux/dqblk_v1.h index 3713a7232dd8..c0d4d1e2a45c 100644 --- a/include/linux/dqblk_v1.h +++ b/include/linux/dqblk_v1.h @@ -5,9 +5,6 @@ #ifndef _LINUX_DQBLK_V1_H #define _LINUX_DQBLK_V1_H -/* Root squash turned on */ -#define V1_DQF_RSQUASH 1 - /* Numbers of blocks needed for updates */ #define V1_INIT_ALLOC 1 #define V1_INIT_REWRITE 1 diff --git a/include/linux/quota.h b/include/linux/quota.h index 50978b781a19..0c42113607ce 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -223,12 +223,14 @@ struct mem_dqinfo { struct super_block; -#define DQF_MASK 0xffff /* Mask for format specific flags */ -#define DQF_GETINFO_MASK 0x1ffff /* Mask for flags passed to userspace */ -#define DQF_SETINFO_MASK 0xffff /* Mask for flags modifiable from userspace */ -#define DQF_SYS_FILE_B 16 -#define DQF_SYS_FILE (1 << DQF_SYS_FILE_B) /* Quota file stored as system file */ -#define DQF_INFO_DIRTY_B 31 +/* Mask for flags passed to userspace */ +#define DQF_GETINFO_MASK (DQF_ROOT_SQUASH | DQF_SYS_FILE) +/* Mask for flags modifiable from userspace */ +#define DQF_SETINFO_MASK DQF_ROOT_SQUASH + +enum { + DQF_INFO_DIRTY_B = DQF_PRIVATE, +}; #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */ extern void mark_info_dirty(struct super_block *sb, int type); -- cgit v1.2.3 From c1155c64e603378dccfc21ee0612cf60dd11725b Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Fri, 19 Dec 2014 17:55:13 +0900 Subject: power: charger-manager: Use alarmtimer for battery monitoring in suspend. To guerantee proper charing and managing batteries even in suspend, charger-manager has used rtc device with rtc framework interface. However, it is better to use alarmtimer for cleaner and more appropriate operation. This patch makes driver to use alarmtimer for polling work in suspend and removes all deprecated codes related with using rtc interface. Signed-off-by: Jonghwa Lee Signed-off-by: Sebastian Reichel --- include/linux/power/charger-manager.h | 32 +------------------------------- 1 file changed, 1 insertion(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h index e97fc656a058..416ebeb6ee1e 100644 --- a/include/linux/power/charger-manager.h +++ b/include/linux/power/charger-manager.h @@ -17,6 +17,7 @@ #include #include +#include enum data_source { CM_BATTERY_PRESENT, @@ -44,29 +45,6 @@ enum cm_event_types { CM_EVENT_OTHERS, }; -/** - * struct charger_global_desc - * @rtc_name: the name of RTC used to wake up the system from suspend. - * @rtc_only_wakeup: - * If the system is woken up by waekup-sources other than the RTC or - * callbacks, Charger Manager should recognize with - * rtc_only_wakeup() returning false. - * If the RTC given to CM is the only wakeup reason, - * rtc_only_wakeup should return true. - * @assume_timer_stops_in_suspend: - * Assume that the jiffy timer stops in suspend-to-RAM. - * When enabled, CM does not rely on jiffies value in - * suspend_again and assumes that jiffies value does not - * change during suspend. - */ -struct charger_global_desc { - char *rtc_name; - - bool (*rtc_only_wakeup)(void); - - bool assume_timer_stops_in_suspend; -}; - /** * struct charger_cable * @extcon_name: the name of extcon device. @@ -266,22 +244,14 @@ struct charger_manager { char psy_name_buf[PSY_NAME_MAX + 1]; struct power_supply charger_psy; - bool status_save_ext_pwr_inserted; - bool status_save_batt; - u64 charging_start_time; u64 charging_end_time; }; #ifdef CONFIG_CHARGER_MANAGER -extern int setup_charger_manager(struct charger_global_desc *gd); -extern bool cm_suspend_again(void); extern void cm_notify_event(struct power_supply *psy, enum cm_event_types type, char *msg); #else -static inline int setup_charger_manager(struct charger_global_desc *gd) -{ return 0; } -static inline bool cm_suspend_again(void) { return false; } static inline void cm_notify_event(struct power_supply *psy, enum cm_event_types type, char *msg) { } #endif -- cgit v1.2.3 From d5db139ab3764640e0882a1746e7b9fdee33fd87 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 22 Jan 2015 11:13:14 +1030 Subject: module: make module_refcount() a signed integer. James Bottomley points out that it will be -1 during unload. It's only used for diagnostics, so let's not hide that as it could be a clue as to what's gone wrong. Cc: Jason Wessel Acked-and-documention-added-by: James Bottomley Reviewed-by: Masami Hiramatsu Signed-off-by: Rusty Russell --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index ebfb0e153c6a..b653d7c0a05a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -444,7 +444,7 @@ extern void __module_put_and_exit(struct module *mod, long code) #define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code) #ifdef CONFIG_MODULE_UNLOAD -unsigned long module_refcount(struct module *mod); +int module_refcount(struct module *mod); void __symbol_put(const char *symbol); #define symbol_put(x) __symbol_put(VMLINUX_SYMBOL_STR(x)) void symbol_put_addr(void *addr); -- cgit v1.2.3 From 8116bf4cb62d337c953cfa5369ef4cf83e73140c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 21 Jan 2015 20:44:01 -0500 Subject: locks: update comments that refer to inode->i_flock Signed-off-by: Jeff Layton --- include/linux/fs.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f87cb2f03103..ddd2fa7cefd3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -925,12 +925,11 @@ int locks_in_grace(struct net *); * FIXME: should we create a separate "struct lock_request" to help distinguish * these two uses? * - * The i_flock list is ordered by: + * The varous i_flctx lists are ordered by: * - * 1) lock type -- FL_LEASEs first, then FL_FLOCK, and finally FL_POSIX - * 2) lock owner - * 3) lock range start - * 4) lock range end + * 1) lock owner + * 2) lock range start + * 3) lock range end * * Obviously, the last two criteria only matter for POSIX locks. */ @@ -1992,8 +1991,9 @@ static inline int break_lease(struct inode *inode, unsigned int mode) { /* * Since this check is lockless, we must ensure that any refcounts - * taken are done before checking inode->i_flock. Otherwise, we could - * end up racing with tasks trying to set a new lease on this file. + * taken are done before checking i_flctx->flc_lease. Otherwise, we + * could end up racing with tasks trying to set a new lease on this + * file. */ smp_mb(); if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) @@ -2005,8 +2005,9 @@ static inline int break_deleg(struct inode *inode, unsigned int mode) { /* * Since this check is lockless, we must ensure that any refcounts - * taken are done before checking inode->i_flock. Otherwise, we could - * end up racing with tasks trying to set a new lease on this file. + * taken are done before checking i_flctx->flc_lease. Otherwise, we + * could end up racing with tasks trying to set a new lease on this + * file. */ smp_mb(); if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) -- cgit v1.2.3 From 30b8b0066cafef274fc92462578ee346211ce7cb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Jan 2015 21:22:39 +0000 Subject: init: Get rid of x86isms The UP local API support can be set up from an early initcall. No need for horrible hackery in the init code. Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: Joerg Roedel Cc: Tony Luck Cc: Borislav Petkov Link: http://lkml.kernel.org/r/20150115211703.827943883@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/smp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 93dff5fff524..be91db2a7017 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -151,6 +151,13 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, static inline void kick_all_cpus_sync(void) { } static inline void wake_up_all_idle_cpus(void) { } +#ifdef CONFIG_UP_LATE_INIT +extern void __init up_late_init(void); +static inline void smp_init(void) { up_late_init(); } +#else +static inline void smp_init(void) { } +#endif + #endif /* !SMP */ /* -- cgit v1.2.3 From 1f94a94f67e1083e19fb7b436dd7ca7a4ba03f2b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 9 Jan 2015 20:34:39 -0600 Subject: PCI: Add generic config accessors Many PCI controllers' configuration space accesses are memory-mapped and vary only in address calculation and access checks. There are 2 main access methods: a decoded address space such as ECAM or a single address and data register similar to x86. This implementation can support both cases as well as be used in cases that need additional pre- or post-access handling. Add a new pci_ops member, map_bus, which can do access checks and any necessary setup. It returns the address to use for the configuration space access. The access types supported are 32-bit only accesses or correct byte, word, or dword sized accesses. Tested-by: Thierry Reding Signed-off-by: Rob Herring Signed-off-by: Bjorn Helgaas Reviewed-by: Thierry Reding --- include/linux/pci.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 360a966a97a5..e7fd51900182 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -560,6 +560,7 @@ static inline int pcibios_err_to_errno(int err) /* Low-level architecture-dependent routines */ struct pci_ops { + void __iomem *(*map_bus)(struct pci_bus *bus, unsigned int devfn, int where); int (*read)(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val); int (*write)(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val); }; @@ -857,6 +858,16 @@ int pci_bus_write_config_word(struct pci_bus *bus, unsigned int devfn, int where, u16 val); int pci_bus_write_config_dword(struct pci_bus *bus, unsigned int devfn, int where, u32 val); + +int pci_generic_config_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val); +int pci_generic_config_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val); +int pci_generic_config_read32(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val); +int pci_generic_config_write32(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val); + struct pci_ops *pci_bus_set_ops(struct pci_bus *bus, struct pci_ops *ops); static inline int pci_read_config_byte(const struct pci_dev *dev, int where, u8 *val) -- cgit v1.2.3 From 41fbf3b39d5eca01527338b4d0ee15ee1ae1023c Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Wed, 17 Dec 2014 13:11:35 +0800 Subject: ktime.h: Introduce ktime_ms_delta This patch adds a reusable time difference function which returns the difference in millisecond, as often used in some driver code, e.g. mtd/test, media/rc, etc. Signed-off-by: Chunyan Zhang Acked-by: Arnd Bergmann Cc: zhang.lyra@gmail.com Cc: davem@davemloft.net Cc: john.stultz@linaro.org Cc: dborkman@redhat.com Link: http://lkml.kernel.org/r/1418793095-18780-1-git-send-email-zhang.chunyan@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/ktime.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index c9d645ad98ff..891ea92a68b0 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -186,6 +186,11 @@ static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier) return ktime_to_us(ktime_sub(later, earlier)); } +static inline s64 ktime_ms_delta(const ktime_t later, const ktime_t earlier) +{ + return ktime_to_ms(ktime_sub(later, earlier)); +} + static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec) { return ktime_add_ns(kt, usec * NSEC_PER_USEC); -- cgit v1.2.3 From 9bc7491906b4113b4c5ae442157c7dfc4e10cd14 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Jan 2015 21:24:10 +0100 Subject: hrtimer: Prevent stale expiry time in hrtimer_interrupt() hrtimer_interrupt() has the following subtle issue: hrtimer_interrupt() lock(cpu_base); expires_next = KTIME_MAX; expire_timers(CLOCK_MONOTONIC); expires = get_next_timer(CLOCK_MONOTONIC); if (expires < expires_next) expires_next = expires; expire_timers(CLOCK_REALTIME); unlock(cpu_base); wakeup() hrtimer_start(CLOCK_MONOTONIC, newtimer); lock(cpu_base(); expires = get_next_timer(CLOCK_REALTIME); if (expires < expires_next) expires_next = expires; So because we already evaluated the next expiring timer of CLOCK_MONOTONIC we ignore that the expiry time of newtimer might be earlier than the overall next expiry time in hrtimer_interrupt(). To solve this, remove the caching of the next expiry value from hrtimer_interrupt() and reevaluate all active clock bases for the next expiry value. To avoid another code duplication, create a shared evaluation function and use it for hrtimer_get_next_event(), hrtimer_force_reprogram() and hrtimer_interrupt(). There is another subtlety in this mechanism: While hrtimer_interrupt() is running, we want to avoid to touch the hardware device because we will reprogram it anyway at the end of hrtimer_interrupt(). This works nicely for hrtimers which get rearmed via the HRTIMER_RESTART mechanism, because we drop out when the callback on that CPU is running. But that fails, if a new timer gets enqueued like in the example above. This has another implication: While hrtimer_interrupt() is running we refuse remote enqueueing of timers - see hrtimer_interrupt() and hrtimer_check_target(). hrtimer_interrupt() tries to prevent this by setting cpu_base->expires to KTIME_MAX, but that fails if a new timer gets queued. Prevent both the hardware access and the remote enqueue explicitely. We can loosen the restriction on the remote enqueue now due to reevaluation of the next expiry value, but that needs a seperate patch. Folded in a fix from Vignesh Radhakrishnan. Reported-and-tested-by: Stanislav Fomichev Based-on-patch-by: Stanislav Fomichev Signed-off-by: Thomas Gleixner Cc: vigneshr@codeaurora.org Cc: john.stultz@linaro.org Cc: viresh.kumar@linaro.org Cc: fweisbec@gmail.com Cc: cl@linux.com Cc: stuart.w.hayes@gmail.com Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1501202049190.5526@nanos Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index a036d058a249..05f6df1fdf5b 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -170,6 +170,7 @@ enum hrtimer_base_type { * @clock_was_set: Indicates that clock was set from irq context. * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() + * @in_hrtirq: hrtimer_interrupt() is currently executing * @hres_active: State of high resolution mode * @hang_detected: The last hrtimer interrupt detected a hang * @nr_events: Total number of hrtimer interrupt events @@ -185,6 +186,7 @@ struct hrtimer_cpu_base { unsigned int clock_was_set; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; + int in_hrtirq; int hres_active; int hang_detected; unsigned long nr_events; -- cgit v1.2.3 From 3c5199143bc4b35f472c5c2534026d74821e2044 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 22 Jan 2015 08:19:32 -0500 Subject: sunrpc/lockd: fix references to the BKL The BKL is completely out of the picture in the lockd and sunrpc code these days. Update the antiquated comments that refer to it. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 6f22cfeef5e3..fae6fb947fc8 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -110,7 +110,7 @@ struct svc_serv { * We use sv_nrthreads as a reference count. svc_destroy() drops * this refcount, so we need to bump it up around operations that * change the number of threads. Horrible, but there it is. - * Should be called with the BKL held. + * Should be called with the "service mutex" held. */ static inline void svc_get(struct svc_serv *serv) { -- cgit v1.2.3 From c5ed1df781cb544d4e4d189bb5b6ec7336d8888c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 19 Jan 2015 08:30:30 +0100 Subject: bcma: use standard bus scanning during early register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting with kernel 3.19-rc1 early registration of bcma on MIPS is done a bit later, with memory allocator available. This allows us to simplify code by using standard bus scanning method. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_soc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_soc.h b/include/linux/bcma/bcma_soc.h index f24d245f8394..1b5fc0c3b1b5 100644 --- a/include/linux/bcma/bcma_soc.h +++ b/include/linux/bcma/bcma_soc.h @@ -5,8 +5,6 @@ struct bcma_soc { struct bcma_bus bus; - struct bcma_device core_cc; - struct bcma_device core_mips; }; int __init bcma_host_soc_register(struct bcma_soc *soc); -- cgit v1.2.3 From a9aaf2915ee265735c28b764551d084e61a694e0 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 13 Jan 2015 11:34:00 +0530 Subject: cpufreq: stats: get rid of per-cpu cpufreq_stats_table All CPUs sharing a cpufreq policy share stats too. For this reason, add a stats pointer to struct cpufreq_policy and drop per-CPU variable cpufreq_stats_table used for accessing cpufreq stats so as to reduce code complexity. Reviewed-by: Prarit Bhargava Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 4d078cebafd2..60b7b496565d 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -113,6 +113,9 @@ struct cpufreq_policy { wait_queue_head_t transition_wait; struct task_struct *transition_task; /* Task which is doing the transition */ + /* cpufreq-stats */ + struct cpufreq_stats *stats; + /* For cpufreq driver's internal use */ void *driver_data; }; -- cgit v1.2.3 From 7c418ff099110d987846c8c670479a3b90ed1dcb Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:08 +0530 Subject: cpufreq: Remove (now) unused 'last_cpu' from struct cpufreq_policy 'last_cpu' was used only from cpufreq-stats and isn't used anymore. Get rid of it. Reviewed-by: Prarit Bhargava Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 60b7b496565d..7e1a389b4e92 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -66,8 +66,6 @@ struct cpufreq_policy { unsigned int shared_type; /* ACPI: ANY or ALL affected CPUs should set cpufreq */ unsigned int cpu; /* cpu nr of CPU managing this policy */ - unsigned int last_cpu; /* cpu nr of previous CPU that managed - * this policy */ struct clk *clk; struct cpufreq_cpuinfo cpuinfo;/* see above */ -- cgit v1.2.3 From d9f354460db8b58a8395936d323b4ca6e8428b9d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 6 Jan 2015 21:09:10 +0530 Subject: cpufreq: remove CPUFREQ_UPDATE_POLICY_CPU notifications CPUFREQ_UPDATE_POLICY_CPU notifications were used only from cpufreq-stats which doesn't use it anymore. Remove them. This also decrements values of other notification macros defined after CPUFREQ_UPDATE_POLICY_CPU by 1 to remove gaps. Hopefully all users are using macro's instead of direct numbers and so they wouldn't break as macro values are changed now. Reviewed-by: Prarit Bhargava Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 7e1a389b4e92..2ee4888c1f47 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -368,9 +368,8 @@ static inline void cpufreq_resume(void) {} #define CPUFREQ_INCOMPATIBLE (1) #define CPUFREQ_NOTIFY (2) #define CPUFREQ_START (3) -#define CPUFREQ_UPDATE_POLICY_CPU (4) -#define CPUFREQ_CREATE_POLICY (5) -#define CPUFREQ_REMOVE_POLICY (6) +#define CPUFREQ_CREATE_POLICY (4) +#define CPUFREQ_REMOVE_POLICY (5) #ifdef CONFIG_CPU_FREQ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list); -- cgit v1.2.3 From 382548a62ade2c003c77a1055b6eb2a47ce30084 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 20 Jan 2015 11:33:09 +0100 Subject: PM / Domains: Remove pm_genpd_dev_need_restore() API There are currently no users of this API, let's remove it. Additionally, if such feature would be needed future wise, a better option is likely use pm_runtime_set_active|suspended() in some form. Signed-off-by: Ulf Hansson Acked-by: Geert Uytterhoeven Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index a9edab2c787a..ed607760fc20 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -140,7 +140,6 @@ extern int __pm_genpd_name_add_device(const char *domain_name, extern int pm_genpd_remove_device(struct generic_pm_domain *genpd, struct device *dev); -extern void pm_genpd_dev_need_restore(struct device *dev, bool val); extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_subdomain); extern int pm_genpd_add_subdomain_names(const char *master_name, @@ -187,7 +186,6 @@ static inline int pm_genpd_remove_device(struct generic_pm_domain *genpd, { return -ENOSYS; } -static inline void pm_genpd_dev_need_restore(struct device *dev, bool val) {} static inline int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_sd) { -- cgit v1.2.3 From 8b618628b2bf83512fc8df5e8672619d65adfdfb Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 3 Dec 2014 14:43:06 -0500 Subject: ktime: Optimize ktime_divns for constant divisors At least on ARM, do_div() is optimized to turn constant divisors into an inline multiplication by the reciprocal value at compile time. However this optimization is missed entirely whenever ktime_divns() is used and the slow out-of-line division code is used all the time. Let ktime_divns() use do_div() inline whenever the divisor is constant and small enough. This will make things like ktime_to_us() and ktime_to_ms() much faster. Cc: Arnd Bergmann Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Nicolas Pitre Acked-by: Arnd Bergmann Signed-off-by: Nicolas Pitre Signed-off-by: John Stultz --- include/linux/ktime.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index c9d645ad98ff..411dd8bfe539 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -166,7 +166,17 @@ static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2) } #if BITS_PER_LONG < 64 -extern u64 ktime_divns(const ktime_t kt, s64 div); +extern u64 __ktime_divns(const ktime_t kt, s64 div); +static inline u64 ktime_divns(const ktime_t kt, s64 div) +{ + if (__builtin_constant_p(div) && !(div >> 32)) { + u64 ns = kt.tv64; + do_div(ns, div); + return ns; + } else { + return __ktime_divns(kt, div); + } +} #else /* BITS_PER_LONG < 64 */ # define ktime_divns(kt, div) (u64)((kt).tv64 / (div)) #endif -- cgit v1.2.3 From d08c0cdd26d48751c15aa2b4479a410594fee9ac Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 8 Dec 2014 12:00:09 -0800 Subject: time: Expose getboottime64 for in-kernel uses Adds a timespec64 based getboottime64() implementation that can be used as we convert internal users of getboottime away from using timespecs. Cc: pang.xunlei Cc: Arnd Bergmann Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: John Stultz --- include/linux/timekeeping.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 9b63d13ba82b..91480137aa39 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -33,6 +33,7 @@ extern time64_t ktime_get_real_seconds(void); extern int __getnstimeofday64(struct timespec64 *tv); extern void getnstimeofday64(struct timespec64 *tv); +extern void getboottime64(struct timespec64 *ts); #if BITS_PER_LONG == 64 /** @@ -72,6 +73,11 @@ static inline struct timespec get_monotonic_coarse(void) { return get_monotonic_coarse64(); } + +static inline void getboottime(struct timespec *ts) +{ + return getboottime64(ts); +} #else /** * Deprecated. Use do_settimeofday64(). @@ -129,9 +135,15 @@ static inline struct timespec get_monotonic_coarse(void) { return timespec64_to_timespec(get_monotonic_coarse64()); } -#endif -extern void getboottime(struct timespec *ts); +static inline void getboottime(struct timespec *ts) +{ + struct timespec64 ts64; + + getboottime64(&ts64); + *ts = timespec64_to_timespec(ts64); +} +#endif #define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) #define ktime_get_real_ts64(ts) getnstimeofday64(ts) -- cgit v1.2.3 From 2e0c78ee5ba4d777ecf22c8f40cc968b4308ca88 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 18 Dec 2014 18:04:34 -0800 Subject: time: Expose get_monotonic_boottime64 for in-kernel use As part of the 2038 conversion process, add a get_monotonic_boottime64 accessor so we can depracate get_monotonic_boottime. Cc: pang.xunlei Cc: Arnd Bergmann Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: John Stultz --- include/linux/timekeeping.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 91480137aa39..3eaae4754275 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -229,6 +229,11 @@ static inline void get_monotonic_boottime(struct timespec *ts) *ts = ktime_to_timespec(ktime_get_boottime()); } +static inline void get_monotonic_boottime64(struct timespec64 *ts) +{ + *ts = ktime_to_timespec64(ktime_get_boottime()); +} + static inline void timekeeping_clocktai(struct timespec *ts) { *ts = ktime_to_timespec(ktime_get_clocktai()); -- cgit v1.2.3 From 9a4a445e30f0b601ca2d9433274047cbf48ebf9e Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Thu, 22 Jan 2015 02:31:55 +0000 Subject: rtc: Convert rtc_set_ntp_time() to use timespec64 rtc_set_ntp_time() uses timespec which is y2038-unsafe, so modify to use timespec64 which is y2038-safe, then replace rtc_time_to_tm() with rtc_time64_to_tm(). Also adjust all its call sites(only NTP uses it) accordingly. Cc: pang.xunlei Cc: Arnd Bergmann Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Xunlei Pang Signed-off-by: John Stultz --- include/linux/rtc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 6d6be09a2fe5..dcad7ee0d746 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -161,7 +161,7 @@ extern void devm_rtc_device_unregister(struct device *dev, extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs); -extern int rtc_set_ntp_time(struct timespec now); +extern int rtc_set_ntp_time(struct timespec64 now); int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm); extern int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alrm); -- cgit v1.2.3 From 872bf2fb69d90e3619befee842fc26db39d8e475 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:35 +0200 Subject: net/mlx4_core: Maintain a persistent memory for mlx4 device Maintain a persistent memory that should survive reset flow/PCI error. This comes as a preparation for coming series to support above flows. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index f1e41b33462f..1069ce65e8b4 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -744,8 +744,15 @@ struct mlx4_vf_dev { u8 n_ports; }; -struct mlx4_dev { +struct mlx4_dev_persistent { struct pci_dev *pdev; + struct mlx4_dev *dev; + int nvfs[MLX4_MAX_PORTS + 1]; + int num_vfs; +}; + +struct mlx4_dev { + struct mlx4_dev_persistent *persist; unsigned long flags; unsigned long num_slaves; struct mlx4_caps caps; @@ -754,13 +761,11 @@ struct mlx4_dev { struct radix_tree_root qp_table_tree; u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; - int num_vfs; int numa_node; int oper_log_mgm_entry_size; u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; struct mlx4_vf_dev *dev_vfs; - int nvfs[MLX4_MAX_PORTS + 1]; }; struct mlx4_eqe { -- cgit v1.2.3 From dd0eefe3abbf47442db296bf68f27eb2860c1cdf Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:36 +0200 Subject: net/mlx4_core: Set device configuration data to be persistent across reset When an HCA enters an internal error state, this is detected by the driver. The driver then should reset the HCA and restart the software stack. Keep ports information and some SRIOV configuration in a persistent area to have it valid across reset. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1069ce65e8b4..8c3837ac1a2d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -749,6 +749,8 @@ struct mlx4_dev_persistent { struct mlx4_dev *dev; int nvfs[MLX4_MAX_PORTS + 1]; int num_vfs; + enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1]; + enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1]; }; struct mlx4_dev { -- cgit v1.2.3 From ad9a0bf08ffbf32b8f292c3bb78ca0f24bb8f6b2 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:37 +0200 Subject: net/mlx4_core: Refactor the catas flow to work per device Using a WQ per device instead of a single global WQ, this allows independent reset handling per device even when SRIOV is used. This comes as a pre-patch for supporting chip reset for both native and SRIOV. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 8c3837ac1a2d..da425d2f3708 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -751,6 +751,8 @@ struct mlx4_dev_persistent { int num_vfs; enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1]; enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1]; + struct work_struct catas_work; + struct workqueue_struct *catas_wq; }; struct mlx4_dev { -- cgit v1.2.3 From f6bc11e42646e661e699a5593cbd1e9dba7191d0 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:38 +0200 Subject: net/mlx4_core: Enhance the catas flow to support device reset This includes: - resetting the chip when a fatal error is detected (the current code does not do this). - exposing the ability to enter error state from outside the catas code by calling its functionality. (E.g. FW Command timeout, AER error). - managing a persistent device state. This is needed to sync between reset flow cases. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index da425d2f3708..7d5d317cb7a6 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -411,6 +411,11 @@ enum { MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4, }; +enum { + MLX4_DEVICE_STATE_UP = 1 << 0, + MLX4_DEVICE_STATE_INTERNAL_ERROR = 1 << 1, +}; + #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK) @@ -753,6 +758,8 @@ struct mlx4_dev_persistent { enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1]; struct work_struct catas_work; struct workqueue_struct *catas_wq; + struct mutex device_state_mutex; /* protect HW state */ + u8 state; }; struct mlx4_dev { -- cgit v1.2.3 From f5aef5aa35063f2b45c3605871cd525d0cb7fb7a Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:39 +0200 Subject: net/mlx4_core: Activate reset flow upon fatal command cases We activate reset flow upon command fatal errors, when the device enters an erroneous state, and must be reset. The cases below are assumed to be fatal: FW command timed-out, an error from FW on closing commands, pci is offline when posting/pending a command. In those cases we place the device into an error state: chip is reset, pending commands are awakened and completed immediately. Subsequent commands will return immediately. The return code in the above cases will depend on the command. Commands which free and close resources will return success (because the chip was reset, so callers may safely free their kernel resources). Other commands will return -EIO. Since the device's state was marked as error, the catas poller will detect this and restart the device's software stack (as is done when a FW internal error is directly detected). The device state is protected by a persistent mutex lives on its mlx4_dev, as such no need any more for the hcr_mutex which is removed. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 64d25941b329..e7543844cc7a 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -279,6 +279,7 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state); int mlx4_config_dev_retrieval(struct mlx4_dev *dev, struct mlx4_config_dev_params *params); +void mlx4_cmd_wake_completions(struct mlx4_dev *dev); /* * mlx4_get_slave_default_vlan - * return true if VST ( default vlan) -- cgit v1.2.3 From c69453e294c9f16da977b68e658a8028b854c209 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:40 +0200 Subject: net/mlx4_core: Manage interface state for Reset flow cases We need to manage interface state to sync between reset flow and some other relative cases such as remove_one. This has to be done to prevent certain races. For example in case software stack is down as a result of unload call, the remove_one should skip the unload phase. Implement the remove_one case, handling AER and other cases comes next. The interface can be up/down, upon remove_one, the state will include an extra bit indicating that the device is cleaned-up, forcing other tasks to finish before the final cleanup. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7d5d317cb7a6..33f9ca71925c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -416,6 +416,11 @@ enum { MLX4_DEVICE_STATE_INTERNAL_ERROR = 1 << 1, }; +enum { + MLX4_INTERFACE_STATE_UP = 1 << 0, + MLX4_INTERFACE_STATE_DELETION = 1 << 1, +}; + #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK) @@ -760,6 +765,8 @@ struct mlx4_dev_persistent { struct workqueue_struct *catas_wq; struct mutex device_state_mutex; /* protect HW state */ u8 state; + struct mutex interface_state_mutex; /* protect SW state */ + u8 interface_state; }; struct mlx4_dev { -- cgit v1.2.3 From 55ad359225b2232b9b8f04a0dfa169bd3a7d86d2 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:42 +0200 Subject: net/mlx4_core: Enable device recovery flow with SRIOV In SRIOV, both the PF and the VF may attempt device recovery whenever they assume that the device is not functioning. When the PF driver resets the device, the VF should detect this and attempt to reinitialize itself. The VF must be able to reset itself under all circumstances, even if the PF is not responsive. The VF shall reset itself in the following cases: 1. Commands are not processed within reasonable time over the communication channel. This is done considering device state and the correct return code based on the command as was done in the native mode, done in the next patch. 2. The VF driver receives an internal error event reported by the PF on the communication channel. This occurs when the PF driver resets the device or when VF is out of sync with the PF. Add 'VF reset' capability, which allows the VF to reinitialize itself even when the PF is not responsive. As PF and VF may run their reset flow simulantanisly, there are several cases that are handled: - Prevent freeing VF resources upon FLR, when PF is in its unloading stage. - Prevent PF getting VF commands before it has finished initializing its resources. - Upon VF startup, check that comm-channel is online before sending commands to the PF and getting timed-out. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 2 ++ include/linux/mlx4/device.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index e7543844cc7a..c989442ffc6a 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -280,6 +280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat int mlx4_config_dev_retrieval(struct mlx4_dev *dev, struct mlx4_config_dev_params *params); void mlx4_cmd_wake_completions(struct mlx4_dev *dev); +void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev); /* * mlx4_get_slave_default_vlan - * return true if VST ( default vlan) @@ -289,5 +290,6 @@ bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave, u16 *vlan, u8 *qos); #define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8) +#define COMM_CHAN_EVENT_INTERNAL_ERR (1 << 17) #endif /* MLX4_CMD_H */ diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 33f9ca71925c..5ef54e145e4d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -208,6 +208,10 @@ enum { MLX4_QUERY_FUNC_FLAGS_A0_RES_QP = 1LL << 1 }; +enum { + MLX4_VF_CAP_FLAG_RESET = 1 << 0 +}; + /* bit enums for an 8-bit flags field indicating special use * QPs which require special handling in qp_reserve_range. * Currently, this only includes QPs used by the ETH interface, @@ -545,6 +549,7 @@ struct mlx4_caps { u8 alloc_res_qp_mask; u32 dmfs_high_rate_qpn_base; u32 dmfs_high_rate_qpn_range; + u32 vf_caps; }; struct mlx4_buf_list { -- cgit v1.2.3 From c2943f14534bdc4230f4da6dcd4ea03c5d8c8162 Mon Sep 17 00:00:00 2001 From: Harout Hedeshian Date: Tue, 20 Jan 2015 10:06:05 -0700 Subject: net: ipv6: Add sysctl entry to disable MTU updates from RA The kernel forcefully applies MTU values received in router advertisements provided the new MTU is less than the current. This behavior is undesirable when the user space is managing the MTU. Instead a sysctl flag 'accept_ra_mtu' is introduced such that the user space can control whether or not RA provided MTU updates should be applied. The default behavior is unchanged; user space must explicitly set this flag to 0 for RA MTUs to be ignored. Signed-off-by: Harout Hedeshian Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c694e7baa621..2805062c013f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -52,6 +52,7 @@ struct ipv6_devconf { __s32 force_tllao; __s32 ndisc_notify; __s32 suppress_frag_ndisc; + __s32 accept_ra_mtu; void *sysctl; }; -- cgit v1.2.3 From 607954b084d4ad5e6a2e0f795de7803d9c6ae37f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 21 Jan 2015 11:12:13 +0000 Subject: rhashtable: fix rht_for_each_entry_safe() endless loop "next" is not updated, causing an endless loop for buckets with more than one element. Fixes: 88d6ed15acff ("rhashtable: Convert bucket iterators to take table and index") Signed-off-by: Patrick McHardy Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index a2562ed53ea3..e0337844358e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -260,7 +260,9 @@ void rhashtable_destroy(struct rhashtable *ht); next = !rht_is_a_nulls(pos) ? \ rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ - pos = next) + pos = next, \ + next = !rht_is_a_nulls(pos) ? \ + rht_dereference_bucket(pos->next, tbl, hash) : NULL) /** * rht_for_each_rcu_continue - continue iterating over rcu hash chain -- cgit v1.2.3 From d5fd120e7860c2b3d4c936a2ebadb6b244bec4c8 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 26 Jan 2015 20:59:31 +0100 Subject: i2c: Only include slave support if selected Make the slave support depend on CONFIG_I2C_SLAVE. Otherwise it gets included unconditionally, even when it is not needed. I2C bus drivers which implement slave support must select I2C_SLAVE. Signed-off-by: Jean Delvare Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index e3a1721c8354..7c7695940ddd 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -228,7 +228,9 @@ struct i2c_client { struct device dev; /* the device structure */ int irq; /* irq issued by device */ struct list_head detected; +#if IS_ENABLED(CONFIG_I2C_SLAVE) i2c_slave_cb_t slave_cb; /* callback for slave mode */ +#endif }; #define to_i2c_client(d) container_of(d, struct i2c_client, dev) @@ -253,6 +255,7 @@ static inline void i2c_set_clientdata(struct i2c_client *dev, void *data) /* I2C slave support */ +#if IS_ENABLED(CONFIG_I2C_SLAVE) enum i2c_slave_event { I2C_SLAVE_REQ_READ_START, I2C_SLAVE_REQ_READ_END, @@ -269,6 +272,7 @@ static inline int i2c_slave_event(struct i2c_client *client, { return client->slave_cb(client, event, val); } +#endif /** * struct i2c_board_info - template for device creation @@ -404,8 +408,10 @@ struct i2c_algorithm { /* To determine what the adapter supports */ u32 (*functionality) (struct i2c_adapter *); +#if IS_ENABLED(CONFIG_I2C_SLAVE) int (*reg_slave)(struct i2c_client *client); int (*unreg_slave)(struct i2c_client *client); +#endif }; /** -- cgit v1.2.3 From 9879de7373fcfb466ec198293b6ccc1ad7a42dd8 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 26 Jan 2015 12:58:32 -0800 Subject: mm: page_alloc: embed OOM killing naturally into allocation slowpath The OOM killing invocation does a lot of duplicative checks against the task's allocation context. Rework it to take advantage of the existing checks in the allocator slowpath. The OOM killer is invoked when the allocator is unable to reclaim any pages but the allocation has to keep looping. Instead of having a check for __GFP_NORETRY hidden in oom_gfp_allowed(), just move the OOM invocation to the true branch of should_alloc_retry(). The __GFP_FS check from oom_gfp_allowed() can then be moved into the OOM avoidance branch in __alloc_pages_may_oom(), along with the PF_DUMPCORE test. __alloc_pages_may_oom() can then signal to the caller whether the OOM killer was invoked, instead of requiring it to duplicate the order and high_zoneidx checks to guess this when deciding whether to continue. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index 853698c721f7..76200984d1e2 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -85,11 +85,6 @@ static inline void oom_killer_enable(void) oom_killer_disabled = false; } -static inline bool oom_gfp_allowed(gfp_t gfp_mask) -{ - return (gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY); -} - extern struct task_struct *find_lock_task_mm(struct task_struct *p); static inline bool task_will_free_mem(struct task_struct *task) -- cgit v1.2.3 From 07261edb971492c6b41b44d7b1b51f76807d30ad Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Mon, 26 Jan 2015 12:58:43 -0800 Subject: printk: add dummy routine for when CONFIG_PRINTK=n There are missing dummy routines for log_buf_addr_get() and log_buf_len_get() for when CONFIG_PRINTK is not set causing build failures. This patch adds these dummy routines at the appropriate location. Signed-off-by: Pranith Kumar Cc: Michael Ellerman Reviewed-by: Petr Mladek Acked-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/printk.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index c8f170324e64..4d5bf5726578 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -10,9 +10,6 @@ extern const char linux_banner[]; extern const char linux_proc_banner[]; -extern char *log_buf_addr_get(void); -extern u32 log_buf_len_get(void); - static inline int printk_get_level(const char *buffer) { if (buffer[0] == KERN_SOH_ASCII && buffer[1]) { @@ -163,6 +160,8 @@ extern int kptr_restrict; extern void wake_up_klogd(void); +char *log_buf_addr_get(void); +u32 log_buf_len_get(void); void log_buf_kexec_setup(void); void __init setup_log_buf(int early); void dump_stack_set_arch_desc(const char *fmt, ...); @@ -198,6 +197,16 @@ static inline void wake_up_klogd(void) { } +static inline char *log_buf_addr_get(void) +{ + return NULL; +} + +static inline u32 log_buf_len_get(void) +{ + return 0; +} + static inline void log_buf_kexec_setup(void) { } -- cgit v1.2.3 From 0b35fa7daefe9da6fdef91d95e07eebb714a8fcc Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sun, 25 Jan 2015 23:33:30 +0100 Subject: NFC: st21nfcb: Remove useless include include/linux/platform_data/st21nfcb.h is phy generic. There is no need to include linux/i2c.h Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/linux/platform_data/st21nfcb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/st21nfcb.h b/include/linux/platform_data/st21nfcb.h index c3b432f5b63e..05c54c958cc3 100644 --- a/include/linux/platform_data/st21nfcb.h +++ b/include/linux/platform_data/st21nfcb.h @@ -19,8 +19,6 @@ #ifndef _ST21NFCB_NCI_H_ #define _ST21NFCB_NCI_H_ -#include - #define ST21NFCB_NCI_DRIVER_NAME "st21nfcb_nci" struct st21nfcb_nfc_platform_data { -- cgit v1.2.3 From 6da7c85c75eed769423b428eb654eaaf89d273c1 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sun, 25 Jan 2015 23:33:31 +0100 Subject: NFC: st21nfcb: Fix copy/paste error in comment include/linux/platform_data/st21nfcb.h is based on include/linux/platform_data/st21nfca.h. The endif comment is inacurrate for st21nfcb. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/linux/platform_data/st21nfcb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/st21nfcb.h b/include/linux/platform_data/st21nfcb.h index 05c54c958cc3..b023373d9874 100644 --- a/include/linux/platform_data/st21nfcb.h +++ b/include/linux/platform_data/st21nfcb.h @@ -26,4 +26,4 @@ struct st21nfcb_nfc_platform_data { unsigned int irq_polarity; }; -#endif /* _ST21NFCA_HCI_H_ */ +#endif /* _ST21NFCB_NCI_H_ */ -- cgit v1.2.3 From aae88261abd58fffef7ee0e00160ce4ea105b0f3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 26 Jan 2015 22:05:38 -0800 Subject: net: phy: document has_fixups field has_fixups was introduced to help keeping track of fixups/quirks running on a PHY device, but we did not update the comment above struct phy_device accordingly. Fixes: b0ae009f3dc14 (net: phy: add "has_fixups" boolean property") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 9c189a1fa3a2..1b3690b597d5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -327,6 +327,7 @@ struct phy_c45_device_ids { * c45_ids: 802.3-c45 Device Identifers if is_c45. * is_c45: Set to true if this phy uses clause 45 addressing. * is_internal: Set to true if this phy is internal to a MAC. + * has_fixups: Set to true if this phy has fixups/quirks. * state: state of the PHY for management purposes * dev_flags: Device-specific flags used by the PHY driver. * addr: Bus address of PHY -- cgit v1.2.3 From 8a477a6fb6a33651adda772360b85fd813569743 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 26 Jan 2015 22:05:39 -0800 Subject: net: phy: keep track of the PHY suspend state In order to avoid double calls to phydev->drv->suspend and resume, keep track of whether the PHY has already been suspended as a consequence of a successful call to phy_suspend(). We will use this in our MDIO bus suspend/resume hooks to avoid a double suspend call. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 1b3690b597d5..685809835b5c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -328,6 +328,7 @@ struct phy_c45_device_ids { * is_c45: Set to true if this phy uses clause 45 addressing. * is_internal: Set to true if this phy is internal to a MAC. * has_fixups: Set to true if this phy has fixups/quirks. + * suspended: Set to true if this phy has been suspended successfully. * state: state of the PHY for management purposes * dev_flags: Device-specific flags used by the PHY driver. * addr: Bus address of PHY @@ -365,6 +366,7 @@ struct phy_device { bool is_c45; bool is_internal; bool has_fixups; + bool suspended; enum phy_state state; -- cgit v1.2.3 From e9fb8b7e4f6af17e3aa4835281e06fdc920341f9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 6 Jan 2015 16:48:36 +0000 Subject: compat: Declare compat_sys_sigpending and compat_sys_sigprocmask prototypes __ARCH_WANT_SYS_SIGPENDING or __ARCH_WANT_SYS_SIGPROGMASK may be defined for compat support but the corresponding prototypes are missing from linux/compat.h. Signed-off-by: Catalin Marinas Acked-by: Andrew Morton Cc: Arnd Bergmann --- include/linux/compat.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 7450ca2ac1fc..ab25814690bc 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -689,6 +689,15 @@ asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, compat_stack_t __user *uoss_ptr); +#ifdef __ARCH_WANT_SYS_SIGPENDING +asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set); +#endif + +#ifdef __ARCH_WANT_SYS_SIGPROCMASK +asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *nset, + compat_old_sigset_t __user *oset); +#endif + int compat_restore_altstack(const compat_stack_t __user *uss); int __compat_save_altstack(compat_stack_t __user *, unsigned long); #define compat_save_altstack_ex(uss, sp) do { \ -- cgit v1.2.3 From 54e45c169dbce43cf46d00eb1521b655b6e4f9e9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 6 Jan 2015 16:52:38 +0000 Subject: syscalls: Declare sys_*stat64 prototypes if __ARCH_WANT_(COMPAT_)STAT64 Currently, the sys_stat64, sys_fstat64 and sys_lstat64 prototpyes are only declared if BITS_PER_LONG == 32. Following commit 0753f70f07fb (fs: Build sys_stat64() and friends if __ARCH_WANT_COMPAT_STAT64), the implementation of these functions is allowed on 64-bit systems for compat support. The patch changes the condition on the prototype declaration from BITS_PER_LONG == 32 to defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64). In addition, it moves the sys_fstatat64 prototype under the same #if block Signed-off-by: Catalin Marinas Acked-by: Andrew Morton Cc: Arnd Bergmann --- include/linux/syscalls.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 85893d744901..76d1e38aabe1 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -410,12 +410,16 @@ asmlinkage long sys_newlstat(const char __user *filename, struct stat __user *statbuf); asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf); asmlinkage long sys_ustat(unsigned dev, struct ustat __user *ubuf); -#if BITS_PER_LONG == 32 +#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) asmlinkage long sys_stat64(const char __user *filename, struct stat64 __user *statbuf); asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf); asmlinkage long sys_lstat64(const char __user *filename, struct stat64 __user *statbuf); +asmlinkage long sys_fstatat64(int dfd, const char __user *filename, + struct stat64 __user *statbuf, int flag); +#endif +#if BITS_PER_LONG == 32 asmlinkage long sys_truncate64(const char __user *path, loff_t length); asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); #endif @@ -771,8 +775,6 @@ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, umode_t mode); asmlinkage long sys_newfstatat(int dfd, const char __user *filename, struct stat __user *statbuf, int flag); -asmlinkage long sys_fstatat64(int dfd, const char __user *filename, - struct stat64 __user *statbuf, int flag); asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf, int bufsiz); asmlinkage long sys_utimensat(int dfd, const char __user *filename, -- cgit v1.2.3 From cfcf1682c4ca8f601a4702255958e0b1c9aa12cc Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 24 Jan 2015 19:52:05 +0200 Subject: cfg80211: Add new GCMP, CCMP-256, BIP-GMAC, BIP-CMAC-256 ciphers This makes cfg80211 aware of the GCMP, GCMP-256, CCMP-256, BIP-GMAC-128, BIP-GMAC-256, and BIP-CMAC-256 cipher suites. These new cipher suites were defined in IEEE Std 802.11ac-2013. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 4f4eea8a6288..dbf417bf25bf 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1994,9 +1994,15 @@ enum ieee80211_key_len { WLAN_KEY_LEN_WEP40 = 5, WLAN_KEY_LEN_WEP104 = 13, WLAN_KEY_LEN_CCMP = 16, + WLAN_KEY_LEN_CCMP_256 = 32, WLAN_KEY_LEN_TKIP = 32, WLAN_KEY_LEN_AES_CMAC = 16, WLAN_KEY_LEN_SMS4 = 32, + WLAN_KEY_LEN_GCMP = 16, + WLAN_KEY_LEN_GCMP_256 = 32, + WLAN_KEY_LEN_BIP_CMAC_256 = 32, + WLAN_KEY_LEN_BIP_GMAC_128 = 16, + WLAN_KEY_LEN_BIP_GMAC_256 = 32, }; #define IEEE80211_WEP_IV_LEN 4 @@ -2004,9 +2010,16 @@ enum ieee80211_key_len { #define IEEE80211_CCMP_HDR_LEN 8 #define IEEE80211_CCMP_MIC_LEN 8 #define IEEE80211_CCMP_PN_LEN 6 +#define IEEE80211_CCMP_256_HDR_LEN 8 +#define IEEE80211_CCMP_256_MIC_LEN 16 +#define IEEE80211_CCMP_256_PN_LEN 6 #define IEEE80211_TKIP_IV_LEN 8 #define IEEE80211_TKIP_ICV_LEN 4 #define IEEE80211_CMAC_PN_LEN 6 +#define IEEE80211_GMAC_PN_LEN 6 +#define IEEE80211_GCMP_HDR_LEN 8 +#define IEEE80211_GCMP_MIC_LEN 16 +#define IEEE80211_GCMP_PN_LEN 6 /* Public action codes */ enum ieee80211_pub_actioncode { @@ -2230,6 +2243,11 @@ enum ieee80211_sa_query_action { #define WLAN_CIPHER_SUITE_WEP104 0x000FAC05 #define WLAN_CIPHER_SUITE_AES_CMAC 0x000FAC06 #define WLAN_CIPHER_SUITE_GCMP 0x000FAC08 +#define WLAN_CIPHER_SUITE_GCMP_256 0x000FAC09 +#define WLAN_CIPHER_SUITE_CCMP_256 0x000FAC0A +#define WLAN_CIPHER_SUITE_BIP_GMAC_128 0x000FAC0B +#define WLAN_CIPHER_SUITE_BIP_GMAC_256 0x000FAC0C +#define WLAN_CIPHER_SUITE_BIP_CMAC_256 0x000FAC0D #define WLAN_CIPHER_SUITE_SMS4 0x00147201 -- cgit v1.2.3 From 56c52da2d554f081e8fce58ecbcf6a40c605b95b Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 24 Jan 2015 19:52:08 +0200 Subject: mac80111: Add BIP-CMAC-256 cipher This allows mac80211 to configure BIP-CMAC-256 to the driver and also use software-implementation within mac80211 when the driver does not support this with hardware accelaration. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index dbf417bf25bf..b9c7897dc566 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1017,6 +1017,15 @@ struct ieee80211_mmie { u8 mic[8]; } __packed; +/* Management MIC information element (IEEE 802.11w) for GMAC and CMAC-256 */ +struct ieee80211_mmie_16 { + u8 element_id; + u8 length; + __le16 key_id; + u8 sequence_number[6]; + u8 mic[16]; +} __packed; + struct ieee80211_vendor_ie { u8 element_id; u8 len; -- cgit v1.2.3 From 05c80d75f10ad7d3f95444b65788d6a0bbb4380d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 19 Dec 2014 09:14:20 -0300 Subject: [media] hdmi: add new HDMI 2.0 defines Add new Video InfoFrame colorspace information introduced in HDMI 2.0 and new Audio Coding Extension Types, also from HDMI 2.0. HDMI_CONTENT_TYPE_NONE was renamed to _GRAPHICS since that's what it is called in CEA-861-F. Signed-off-by: Hans Verkuil Reviewed-by: Thierry Reding Acked-by: Thierry Reding Signed-off-by: Mauro Carvalho Chehab --- include/linux/hdmi.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index cbb5790a35cd..5afc0bff2bbe 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -52,12 +52,18 @@ enum hdmi_colorspace { HDMI_COLORSPACE_RGB, HDMI_COLORSPACE_YUV422, HDMI_COLORSPACE_YUV444, + HDMI_COLORSPACE_YUV420, + HDMI_COLORSPACE_RESERVED4, + HDMI_COLORSPACE_RESERVED5, + HDMI_COLORSPACE_RESERVED6, + HDMI_COLORSPACE_IDO_DEFINED, }; enum hdmi_scan_mode { HDMI_SCAN_MODE_NONE, HDMI_SCAN_MODE_OVERSCAN, HDMI_SCAN_MODE_UNDERSCAN, + HDMI_SCAN_MODE_RESERVED, }; enum hdmi_colorimetry { @@ -71,6 +77,7 @@ enum hdmi_picture_aspect { HDMI_PICTURE_ASPECT_NONE, HDMI_PICTURE_ASPECT_4_3, HDMI_PICTURE_ASPECT_16_9, + HDMI_PICTURE_ASPECT_RESERVED, }; enum hdmi_active_aspect { @@ -92,12 +99,18 @@ enum hdmi_extended_colorimetry { HDMI_EXTENDED_COLORIMETRY_S_YCC_601, HDMI_EXTENDED_COLORIMETRY_ADOBE_YCC_601, HDMI_EXTENDED_COLORIMETRY_ADOBE_RGB, + + /* The following EC values are only defined in CEA-861-F. */ + HDMI_EXTENDED_COLORIMETRY_BT2020_CONST_LUM, + HDMI_EXTENDED_COLORIMETRY_BT2020, + HDMI_EXTENDED_COLORIMETRY_RESERVED, }; enum hdmi_quantization_range { HDMI_QUANTIZATION_RANGE_DEFAULT, HDMI_QUANTIZATION_RANGE_LIMITED, HDMI_QUANTIZATION_RANGE_FULL, + HDMI_QUANTIZATION_RANGE_RESERVED, }; /* non-uniform picture scaling */ @@ -114,7 +127,7 @@ enum hdmi_ycc_quantization_range { }; enum hdmi_content_type { - HDMI_CONTENT_TYPE_NONE, + HDMI_CONTENT_TYPE_GRAPHICS, HDMI_CONTENT_TYPE_PHOTO, HDMI_CONTENT_TYPE_CINEMA, HDMI_CONTENT_TYPE_GAME, @@ -194,6 +207,7 @@ enum hdmi_audio_coding_type { HDMI_AUDIO_CODING_TYPE_MLP, HDMI_AUDIO_CODING_TYPE_DST, HDMI_AUDIO_CODING_TYPE_WMA_PRO, + HDMI_AUDIO_CODING_TYPE_CXT, }; enum hdmi_audio_sample_size { @@ -216,9 +230,23 @@ enum hdmi_audio_sample_frequency { enum hdmi_audio_coding_type_ext { HDMI_AUDIO_CODING_TYPE_EXT_STREAM, + + /* + * The next three CXT values are defined in CEA-861-E only. + * They do not exist in older versions, and in CEA-861-F they are + * defined as 'Not in use'. + */ HDMI_AUDIO_CODING_TYPE_EXT_HE_AAC, HDMI_AUDIO_CODING_TYPE_EXT_HE_AAC_V2, HDMI_AUDIO_CODING_TYPE_EXT_MPEG_SURROUND, + + /* The following CXT values are only defined in CEA-861-F. */ + HDMI_AUDIO_CODING_TYPE_EXT_MPEG4_HE_AAC, + HDMI_AUDIO_CODING_TYPE_EXT_MPEG4_HE_AAC_V2, + HDMI_AUDIO_CODING_TYPE_EXT_MPEG4_AAC_LC, + HDMI_AUDIO_CODING_TYPE_EXT_DRA, + HDMI_AUDIO_CODING_TYPE_EXT_MPEG4_HE_AAC_SURROUND, + HDMI_AUDIO_CODING_TYPE_EXT_MPEG4_AAC_LC_SURROUND = 10, }; struct hdmi_audio_infoframe { -- cgit v1.2.3 From 2c676f378edb16cb68f7815581c8119fc43a4b85 Mon Sep 17 00:00:00 2001 From: Martin Bugge Date: Fri, 19 Dec 2014 09:14:21 -0300 Subject: [media] hdmi: added unpack and logging functions for InfoFrames When receiving video it is very useful to be able to unpack the InfoFrames. Logging is useful as well, both for transmitters and receivers. Especially when implementing the VIDIOC_LOG_STATUS ioctl (supported by many V4L2 drivers) for a receiver it is important to be able to easily log what the InfoFrame contains. This greatly simplifies debugging. Signed-off-by: Martin Bugge Signed-off-by: Hans Verkuil Acked-by: Thierry Reding Signed-off-by: Mauro Carvalho Chehab --- include/linux/hdmi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index 5afc0bff2bbe..2ff34315a1bb 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -25,6 +25,7 @@ #define __LINUX_HDMI_H_ #include +#include enum hdmi_infoframe_type { HDMI_INFOFRAME_TYPE_VENDOR = 0x81, @@ -327,5 +328,8 @@ union hdmi_infoframe { ssize_t hdmi_infoframe_pack(union hdmi_infoframe *frame, void *buffer, size_t size); +int hdmi_infoframe_unpack(union hdmi_infoframe *frame, void *buffer); +void hdmi_infoframe_log(const char *level, struct device *dev, + union hdmi_infoframe *frame); #endif /* _DRM_HDMI_H */ -- cgit v1.2.3 From dc189053e1a5ae606c56e432dae1afc28261a819 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 19 Dec 2014 09:14:22 -0300 Subject: [media] hdmi: rename HDMI_AUDIO_CODING_TYPE_EXT_STREAM to _EXT_CT As per the suggestion of Thierry Reding rename HDMI_AUDIO_CODING_TYPE_EXT_STREAM to HDMI_AUDIO_CODING_TYPE_EXT_CT to be consistent with the CEA-861 spec. Signed-off-by: Hans Verkuil Acked-by: Thierry Reding Signed-off-by: Mauro Carvalho Chehab --- include/linux/hdmi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index 2ff34315a1bb..e9744202fa29 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -230,7 +230,8 @@ enum hdmi_audio_sample_frequency { }; enum hdmi_audio_coding_type_ext { - HDMI_AUDIO_CODING_TYPE_EXT_STREAM, + /* Refer to Audio Coding Type (CT) field in Data Byte 1 */ + HDMI_AUDIO_CODING_TYPE_EXT_CT, /* * The next three CXT values are defined in CEA-861-E only. -- cgit v1.2.3 From f27b37f5993a080700ccecdce9960d1563eccd36 Mon Sep 17 00:00:00 2001 From: Bintian Wang Date: Tue, 27 Jan 2015 20:50:29 +0800 Subject: regmap: correct the description of structure element in reg_field Fix incorrect description of structure element "msb", which is described as "reg". Signed-off-by: Bintian Wang Signed-off-by: Mark Brown --- include/linux/regmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 4419b99d8d6e..116655d92269 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -468,7 +468,7 @@ bool regmap_reg_in_ranges(unsigned int reg, * * @reg: Offset of the register within the regmap bank * @lsb: lsb of the register field. - * @reg: msb of the register field. + * @msb: msb of the register field. * @id_size: port size if it has some ports * @id_offset: address offset for each ports */ -- cgit v1.2.3 From 2130fb97fecf9a51bb4a21da220cff3f72496a94 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 27 Jan 2015 01:18:19 +0100 Subject: NFC: st21nfca: Adding support for secure element st21nfca has 1 physical SWP line and can support up to 2 secure elements (UICC & eSE) thanks to an external switch managed with a gpio. The platform integrator needs to specify thanks to 2 initialization properties, uicc-present and ese-present, if it is suppose to have uicc and/or ese. Of course if the platform does not have an external switch, only one kind of secure element can be supported. Those parameters are under platform integrator responsibilities. During initialization, the white_list will be set according to those parameters. The discovery_se function will assume a secure element is physically present according to uicc-present and ese-present values and will add it to the secure element list. On ese activation, the atr is retrieved to calculate a command exchange timeout based on the first atr(TB) value. The se_io will allow to transfer data over SWP. 2 kind of events may appear after a data is sent over: - ST21NFCA_EVT_TRANSMIT_DATA when receiving an apdu answer - ST21NFCA_EVT_WTX_REQUEST when the secure element needs more time than expected to compute a command. If this timeout expired, a first recovery tentative consist to send a simple software reset proprietary command. If this tentative still fail, a second recovery tentative consist to send a hardware reset proprietary command. This function is only relevant for eSE like secure element. This patch also change the way a pipe is referenced. There can be different pipe connected to the same gate with different host destination (ex: CONNECTIVITY). In order to keep host information every pipe are reference with a tuple (gate, host). In order to reduce changes, we are keeping unchanged the way a gate is addressed on the Terminal Host. However, this is working because we consider the apdu reader gate is only present on the eSE slot also the connectivity gate cannot give a reliable value; it will give the latest stored pipe value. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/linux/platform_data/st21nfca.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/st21nfca.h b/include/linux/platform_data/st21nfca.h index 5087fff96d86..cc2bdafb0c69 100644 --- a/include/linux/platform_data/st21nfca.h +++ b/include/linux/platform_data/st21nfca.h @@ -26,6 +26,8 @@ struct st21nfca_nfc_platform_data { unsigned int gpio_ena; unsigned int irq_polarity; + bool is_ese_present; + bool is_uicc_present; }; #endif /* _ST21NFCA_HCI_H_ */ -- cgit v1.2.3 From be6a6b43b597a37d96dbf74985f72045ccef0940 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 27 Jan 2015 15:57:59 +0200 Subject: net/mlx4_core: Add bad-cable event support If the firmware can detect a bad cable, allow it to generate an event, and print the problem in the log. Signed-off-by: Jack Morgenstein Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 5ef54e145e4d..c95d659a39f2 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -200,7 +200,8 @@ enum { MLX4_DEV_CAP_FLAG2_CONFIG_DEV = 1LL << 16, MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 17, MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18, - MLX4_DEV_CAP_FLAG2_FS_A0 = 1LL << 19 + MLX4_DEV_CAP_FLAG2_FS_A0 = 1LL << 19, + MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20 }; enum { @@ -280,6 +281,7 @@ enum mlx4_event { MLX4_EVENT_TYPE_FATAL_WARNING = 0x1b, MLX4_EVENT_TYPE_FLR_EVENT = 0x1c, MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT = 0x1d, + MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT = 0x3e, MLX4_EVENT_TYPE_NONE = 0xff, }; @@ -288,6 +290,11 @@ enum { MLX4_PORT_CHANGE_SUBTYPE_ACTIVE = 4 }; +enum { + MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_BAD_CABLE = 1, + MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_UNSUPPORTED_CABLE = 2, +}; + enum { MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0, }; @@ -860,6 +867,11 @@ struct mlx4_eqe { } __packed tbl_change_info; } params; } __packed port_mgmt_change; + struct { + u8 reserved[3]; + u8 port; + u32 reserved1[5]; + } __packed bad_cable; } event; u8 slave_id; u8 reserved3[2]; -- cgit v1.2.3 From 5a03108689c6f3e448a920b42af04e6d28401f80 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 27 Jan 2015 15:58:02 +0200 Subject: net/mlx4_core: Adjust command timeouts to conform to the firmware spec The firmware spec states that the timeout for all commands should be 60 seconds. In the past, the spec indicated that there were several classes of timeout (short, medium, and long). The driver has these different timeout classes. We leave the class differentiation in the driver as-is (to protect against any future spec changes), but set the timeout for all classes to be 60 seconds. In addition, we fix a few commands which had hard-coded numeric timeouts specified. Signed-off-by: Jack Morgenstein Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index c989442ffc6a..ae95adc78509 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -165,9 +165,9 @@ enum { }; enum { - MLX4_CMD_TIME_CLASS_A = 10000, - MLX4_CMD_TIME_CLASS_B = 10000, - MLX4_CMD_TIME_CLASS_C = 10000, + MLX4_CMD_TIME_CLASS_A = 60000, + MLX4_CMD_TIME_CLASS_B = 60000, + MLX4_CMD_TIME_CLASS_C = 60000, }; enum { -- cgit v1.2.3 From 14bf61ffe6ac54afcd1e888a4407fe16054483db Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 9 Oct 2014 16:03:13 +0200 Subject: quota: Switch ->get_dqblk() and ->set_dqblk() to use bytes as space units Currently ->get_dqblk() and ->set_dqblk() use struct fs_disk_quota which tracks space limits and usage in 512-byte blocks. However VFS quotas track usage in bytes (as some filesystems require that) and we need to somehow pass this information. Upto now it wasn't a problem because we didn't do any unit conversion (thus VFS quota routines happily stuck number of bytes into d_bcount field of struct fd_disk_quota). Only if you tried to use Q_XGETQUOTA or Q_XSETQLIM for VFS quotas (or Q_GETQUOTA / Q_SETQUOTA for XFS quotas), you got bogus results. Hardly anyone tried this but reportedly some Samba users hit the problem in practice. So when we want interfaces compatible we need to fix this. We bite the bullet and define another quota structure used for passing information from/to ->get_dqblk()/->set_dqblk. It's somewhat sad we have to have more conversion routines in fs/quota/quota.c and another copying of quota structure slows down getting of quota information by about 2% but it seems cleaner than overloading e.g. units of d_bcount to bytes. CC: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- include/linux/quota.h | 47 +++++++++++++++++++++++++++++++++++++++++++++-- include/linux/quotaops.h | 4 ++-- 2 files changed, 47 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 50978b781a19..097d7eb2441e 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -321,6 +321,49 @@ struct dquot_operations { struct path; +/* Structure for communicating via ->get_dqblk() & ->set_dqblk() */ +struct qc_dqblk { + int d_fieldmask; /* mask of fields to change in ->set_dqblk() */ + u64 d_spc_hardlimit; /* absolute limit on used space */ + u64 d_spc_softlimit; /* preferred limit on used space */ + u64 d_ino_hardlimit; /* maximum # allocated inodes */ + u64 d_ino_softlimit; /* preferred inode limit */ + u64 d_space; /* Space owned by the user */ + u64 d_ino_count; /* # inodes owned by the user */ + s64 d_ino_timer; /* zero if within inode limits */ + /* if not, we refuse service */ + s64 d_spc_timer; /* similar to above; for space */ + int d_ino_warns; /* # warnings issued wrt num inodes */ + int d_spc_warns; /* # warnings issued wrt used space */ + u64 d_rt_spc_hardlimit; /* absolute limit on realtime space */ + u64 d_rt_spc_softlimit; /* preferred limit on RT space */ + u64 d_rt_space; /* realtime space owned */ + s64 d_rt_spc_timer; /* similar to above; for RT space */ + int d_rt_spc_warns; /* # warnings issued wrt RT space */ +}; + +/* Field specifiers for ->set_dqblk() in struct qc_dqblk */ +#define QC_INO_SOFT (1<<0) +#define QC_INO_HARD (1<<1) +#define QC_SPC_SOFT (1<<2) +#define QC_SPC_HARD (1<<3) +#define QC_RT_SPC_SOFT (1<<4) +#define QC_RT_SPC_HARD (1<<5) +#define QC_LIMIT_MASK (QC_INO_SOFT | QC_INO_HARD | QC_SPC_SOFT | QC_SPC_HARD | \ + QC_RT_SPC_SOFT | QC_RT_SPC_HARD) +#define QC_SPC_TIMER (1<<6) +#define QC_INO_TIMER (1<<7) +#define QC_RT_SPC_TIMER (1<<8) +#define QC_TIMER_MASK (QC_SPC_TIMER | QC_INO_TIMER | QC_RT_SPC_TIMER) +#define QC_SPC_WARNS (1<<9) +#define QC_INO_WARNS (1<<10) +#define QC_RT_SPC_WARNS (1<<11) +#define QC_WARNS_MASK (QC_SPC_WARNS | QC_INO_WARNS | QC_RT_SPC_WARNS) +#define QC_SPACE (1<<12) +#define QC_INO_COUNT (1<<13) +#define QC_RT_SPACE (1<<14) +#define QC_ACCT_MASK (QC_SPACE | QC_INO_COUNT | QC_RT_SPACE) + /* Operations handling requests from userspace */ struct quotactl_ops { int (*quota_on)(struct super_block *, int, int, struct path *); @@ -329,8 +372,8 @@ struct quotactl_ops { int (*quota_sync)(struct super_block *, int); int (*get_info)(struct super_block *, int, struct if_dqinfo *); int (*set_info)(struct super_block *, int, struct if_dqinfo *); - int (*get_dqblk)(struct super_block *, struct kqid, struct fs_disk_quota *); - int (*set_dqblk)(struct super_block *, struct kqid, struct fs_disk_quota *); + int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); + int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); int (*get_xstate)(struct super_block *, struct fs_quota_stat *); int (*set_xstate)(struct super_block *, unsigned int, int); int (*get_xstatev)(struct super_block *, struct fs_quota_statv *); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index f23538a6e411..29e3455f7d41 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -98,9 +98,9 @@ int dquot_quota_sync(struct super_block *sb, int type); int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int dquot_get_dqblk(struct super_block *sb, struct kqid id, - struct fs_disk_quota *di); + struct qc_dqblk *di); int dquot_set_dqblk(struct super_block *sb, struct kqid id, - struct fs_disk_quota *di); + struct qc_dqblk *di); int __dquot_transfer(struct inode *inode, struct dquot **transfer_to); int dquot_transfer(struct inode *inode, struct iattr *iattr); -- cgit v1.2.3 From 3aa8793f751d4cfcaca886e75ab30dfb00cf1d88 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 28 Nov 2014 14:38:36 +0100 Subject: mmc: core: Initial support for MMC power sequences System on chip designs may specify a specific MMC power sequence. To successfully detect an (e)MMC/SD/SDIO card, that power sequence must be followed while initializing the card. To be able to handle these SOC specific power sequences, let's add a MMC power sequence interface. It provides the following functions to help the mmc core to deal with these power sequences. mmc_pwrseq_alloc() - Invoked from mmc_of_parse(), to initialize data. mmc_pwrseq_pre_power_on()- Invoked in the beginning of mmc_power_up(). mmc_pwrseq_post_power_on()- Invoked at the end in mmc_power_up(). mmc_pwrseq_power_off()- Invoked from mmc_power_off(). mmc_pwrseq_free() - Invoked from mmc_free_host(), to free data. Each MMC power sequence provider will be responsible to implement a set of callbacks. These callbacks mirrors the functions above. This patch adds the skeleton, following patches will extend the core of the MMC power sequence and add support for a specific simple MMC power sequence. Do note, since the mmc_pwrseq_alloc() is invoked from mmc_of_parse(), host drivers needs to make use of this API to enable the support for MMC power sequences. Moreover the MMC power sequence support depends on CONFIG_OF. Signed-off-by: Ulf Hansson Tested-by: Javier Martinez Canillas Reviewed-by: Javier Martinez Canillas --- include/linux/mmc/host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index b6bf718c3498..0c8cbe5d1550 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -195,6 +195,7 @@ struct mmc_context_info { }; struct regulator; +struct mmc_pwrseq; struct mmc_supply { struct regulator *vmmc; /* Card power supply */ @@ -206,6 +207,7 @@ struct mmc_host { struct device class_dev; int index; const struct mmc_host_ops *ops; + struct mmc_pwrseq *pwrseq; unsigned int f_min; unsigned int f_max; unsigned int f_init; -- cgit v1.2.3 From c3c87e770458aa004bd7ed3f29945ff436fd6511 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 23 Jan 2015 11:19:48 +0100 Subject: perf: Tighten (and fix) the grouping condition The fix from 9fc81d87420d ("perf: Fix events installation during moving group") was incomplete in that it failed to recognise that creating a group with events for different CPUs is semantically broken -- they cannot be co-scheduled. Furthermore, it leads to real breakage where, when we create an event for CPU Y and then migrate it to form a group on CPU X, the code gets confused where the counter is programmed -- triggered in practice as well by me via the perf fuzzer. Fix this by tightening the rules for creating groups. Only allow grouping of counters that can be co-scheduled in the same context. This means for the same task and/or the same cpu. Fixes: 9fc81d87420d ("perf: Fix events installation during moving group") Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20150123125834.090683288@infradead.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4f7a61ca4b39..664de5a4ec46 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -450,11 +450,6 @@ struct perf_event { #endif /* CONFIG_PERF_EVENTS */ }; -enum perf_event_context_type { - task_context, - cpu_context, -}; - /** * struct perf_event_context - event context structure * @@ -462,7 +457,6 @@ enum perf_event_context_type { */ struct perf_event_context { struct pmu *pmu; - enum perf_event_context_type type; /* * Protect the states of the events in the list, * nr_active, and the list: -- cgit v1.2.3 From 667a0a06c99d5291433b869ed35dabdd95ba1453 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 18 Dec 2014 14:48:15 +0000 Subject: mm: provide a find_special_page vma operation The optional find_special_page VMA operation is used to lookup the pages backing a VMA. This is useful in cases where the normal mechanisms for finding the page don't work. This is only called if the PTE is special. One use case is a Xen PV guest mapping foreign pages into userspace. In a Xen PV guest, the PTEs contain MFNs so get_user_pages() (for example) must do an MFN to PFN (M2P) lookup before it can get the page. For foreign pages (those owned by another guest) the M2P lookup returns the PFN as seen by the foreign guest (which would be completely the wrong page for the local guest). This cannot be fixed up improving the M2P lookup since one MFN may be mapped onto two or more pages so getting the right page is impossible given just the MFN. Signed-off-by: David Vrabel Acked-by: Andrew Morton --- include/linux/mm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 80fc92a49649..9269af7349fe 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -290,6 +290,14 @@ struct vm_operations_struct { /* called by sys_remap_file_pages() to populate non-linear mapping */ int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr, unsigned long size, pgoff_t pgoff); + + /* + * Called by vm_normal_page() for special PTEs to find the + * page for @addr. This is useful if the default behavior + * (using pte_page()) would not find the correct page. + */ + struct page *(*find_special_page)(struct vm_area_struct *vma, + unsigned long addr); }; struct mmu_gather; -- cgit v1.2.3 From d8ac3dd41aea245f65465449efc35dd3ac71e91d Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Mon, 5 Jan 2015 13:24:09 +0000 Subject: mm: add 'foreign' alias for the 'pinned' page flag The foreign page flag will be used by Xen guests to mark pages that have grant mappings of frames from other (foreign) guests. The foreign flag is an alias for the existing (Xen-specific) pinned flag. This is safe because pinned is only used on pages used for page tables and these cannot also be foreign. Signed-off-by: Jennifer Herbert Acked-by: Andrew Morton Signed-off-by: David Vrabel --- include/linux/page-flags.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e1f5fcd79792..5ed7bdaf22d5 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -121,8 +121,12 @@ enum pageflags { PG_fscache = PG_private_2, /* page backed by cache */ /* XEN */ + /* Pinned in Xen as a read-only pagetable page. */ PG_pinned = PG_owner_priv_1, + /* Pinned as part of domain save (see xen_mm_pin_all()). */ PG_savepinned = PG_dirty, + /* Has a grant mapping of another (foreign) domain's page. */ + PG_foreign = PG_owner_priv_1, /* SLOB */ PG_slob_free = PG_private, @@ -215,6 +219,7 @@ __PAGEFLAG(Slab, slab) PAGEFLAG(Checked, checked) /* Used by some filesystems */ PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ PAGEFLAG(SavePinned, savepinned); /* Xen */ +PAGEFLAG(Foreign, foreign); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) __SETPAGEFLAG(SwapBacked, swapbacked) -- cgit v1.2.3 From 6ea22486ba46bcb665de36514094d74575cd1330 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 28 Jan 2015 12:48:53 +0000 Subject: tracing: Add array printing helper If a trace event contains an array, there is currently no standard way to format this for text output. Drivers are currently hacking around this by a) local hacks that use the trace_seq functionailty directly, or b) just not printing that information. For fixed size arrays, formatting of the elements can be open-coded, but this gets cumbersome for arrays of non-trivial size. These approaches result in non-standard content of the event format description delivered to userspace, so userland tools needs to be taught to understand and parse each array printing method individually. This patch implements a __print_array() helper that tracepoint implementations can use instead of reinventing it. A simple C-style syntax is used to delimit the array and its elements {like,this}. So that the helpers can be used with large static arrays as well as dynamic arrays, they take a pointer and element count: they can be used with __get_dynamic_array() for use with dynamic arrays. Link: http://lkml.kernel.org/r/1422449335-8289-2-git-send-email-javi.merino@arm.com Cc: Ingo Molnar Signed-off-by: Dave Martin Signed-off-by: Javi Merino Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 0bebb5c348b8..5aa4a9269547 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -44,6 +44,10 @@ const char *ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, const char *ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int len); +const char *ftrace_print_array_seq(struct trace_seq *p, + const void *buf, int buf_len, + size_t el_size); + struct trace_iterator; struct trace_event; -- cgit v1.2.3 From 8c7dd8bce05345ca5fe249b64782e8feeb3b9259 Mon Sep 17 00:00:00 2001 From: James Ban Date: Wed, 28 Jan 2015 09:28:08 +0900 Subject: regulator: da9211: Add gpio control for enable/disable of buck This is a patch for adding gpio control about enable/disable of buck. Signed-off-by: James Ban Signed-off-by: Mark Brown --- include/linux/regulator/da9211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h index d1d9d3849bdb..5dd65acc2a69 100644 --- a/include/linux/regulator/da9211.h +++ b/include/linux/regulator/da9211.h @@ -32,6 +32,7 @@ struct da9211_pdata { * 2 : 2 phase 2 buck */ int num_buck; + int gpio_ren[DA9211_MAX_REGULATORS]; struct device_node *reg_node[DA9211_MAX_REGULATORS]; struct regulator_init_data *init_data[DA9211_MAX_REGULATORS]; }; -- cgit v1.2.3 From 018d5ef2048fcab339467bcbebccf588c9bd2531 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 29 Jan 2015 08:30:29 +0900 Subject: ata: ahci_platform: fix owner module reference mismatch for scsi host The owner module reference of the ahci platform's scsi_host is initialized to libahci_platform's one, because these drivers use a scsi_host_template defined in libahci_platform. So these drivers can be unloaded even if the scsi device is being accessed. This fixes it by pushing the scsi_host_template from libahci_platform to all leaf drivers. The scsi_host_template is passed through a new argument of ahci_platform_init_host(). Signed-off-by: Akinobu Mita Signed-off-by: Tejun Heo Cc: Hans de Goede Cc: Christoph Hellwig Cc: "James E.J. Bottomley" Cc: linux-ide@vger.kernel.org Cc: linux-scsi@vger.kernel.org --- include/linux/ahci_platform.h | 4 +++- include/linux/libata.h | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index f65b33809170..a270f25ee7c7 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -21,6 +21,7 @@ struct device; struct ata_port_info; struct ahci_host_priv; struct platform_device; +struct scsi_host_template; int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); @@ -32,7 +33,8 @@ struct ahci_host_priv *ahci_platform_get_resources( struct platform_device *pdev); int ahci_platform_init_host(struct platform_device *pdev, struct ahci_host_priv *hpriv, - const struct ata_port_info *pi_template); + const struct ata_port_info *pi_template, + struct scsi_host_template *sht); int ahci_platform_suspend_host(struct device *dev); int ahci_platform_resume_host(struct device *dev); diff --git a/include/linux/libata.h b/include/linux/libata.h index 2d182413b1db..11beb4196c32 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1338,6 +1338,12 @@ extern const struct ata_port_operations ata_base_port_ops; extern const struct ata_port_operations sata_port_ops; extern struct device_attribute *ata_common_sdev_attrs[]; +/* + * All sht initializers (BASE, PIO, BMDMA, NCQ) must be instantiated + * by the edge drivers. Because the 'module' field of sht must be the + * edge driver's module reference, otherwise the driver can be unloaded + * even if the scsi_device is being accessed. + */ #define ATA_BASE_SHT(drv_name) \ .module = THIS_MODULE, \ .name = drv_name, \ -- cgit v1.2.3 From 17263905399471016cda6c1975044d14291c5ba5 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 29 Jan 2015 08:30:30 +0900 Subject: ata: pata_platform: fix owner module reference mismatch for scsi host The owner module reference of the pata_of_platform's scsi_host is initialized to pata_platform's one, because pata_of_platform driver use a scsi_host_template defined in pata_platform. So this drivers can be unloaded even if the scsi device is being accessed. This fixes it by propagating the scsi_host_template to pata_of_platform driver. The scsi_host_template is passed through a new argument of __pata_platform_probe(). Signed-off-by: Akinobu Mita Signed-off-by: Tejun Heo Cc: Hans de Goede Cc: Christoph Hellwig Cc: "James E.J. Bottomley" Cc: linux-ide@vger.kernel.org Cc: linux-scsi@vger.kernel.org --- include/linux/ata_platform.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h index 5c618a084225..619d9e78e644 100644 --- a/include/linux/ata_platform.h +++ b/include/linux/ata_platform.h @@ -10,12 +10,15 @@ struct pata_platform_info { unsigned int ioport_shift; }; +struct scsi_host_template; + extern int __pata_platform_probe(struct device *dev, struct resource *io_res, struct resource *ctl_res, struct resource *irq_res, unsigned int ioport_shift, - int __pio_mask); + int __pio_mask, + struct scsi_host_template *sht); /* * Marvell SATA private data -- cgit v1.2.3 From 61f552141c9c0e88b3fdc7046265781ffd8fa68a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 18 Jan 2015 16:45:42 +0100 Subject: ftrace: let notrace function attribute disable hotpatching if necessary gcc supports an s390 specific function attribute called "hotpatch". It can be used to specify the number of halfwords that shall be added before and after a function and which shall be filled with nops for runtime patching. s390 will use the hotpatch attribute for function tracing, therefore make sure that the notrace function attribute either disables the mcount call or in case of hotpatch nop generation. Acked-by: Steven Rostedt Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- include/linux/compiler.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d5ad7b1118fc..1ef679f4b88e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -54,7 +54,11 @@ extern void __chk_io_ptr(const volatile void __iomem *); #include #endif +#ifdef CC_USING_HOTPATCH +#define notrace __attribute__((hotpatch(0,0))) +#else #define notrace __attribute__((no_instrument_function)) +#endif /* Intel compiler defines __GNUC__. So we will overwrite implementations * coming from above header files here -- cgit v1.2.3 From 3c313161353ad527de1a6ecde0f0d41700858fd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sat, 24 Jan 2015 18:47:20 +0100 Subject: bcma: detect SPROM revision 11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extracting values from it is still unsupported, but at least we'll display some meaningful error now. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/ssb/ssb_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index f7b9100686c3..c0f707ac192b 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -173,6 +173,7 @@ #define SSB_SPROMSIZE_BYTES_R123 (SSB_SPROMSIZE_WORDS_R123 * sizeof(u16)) #define SSB_SPROMSIZE_BYTES_R4 (SSB_SPROMSIZE_WORDS_R4 * sizeof(u16)) #define SSB_SPROMSIZE_WORDS_R10 230 +#define SSB_SPROMSIZE_WORDS_R11 234 #define SSB_SPROM_BASE1 0x1000 #define SSB_SPROM_BASE31 0x0800 #define SSB_SPROM_REVISION 0x007E -- cgit v1.2.3 From b504075f5903b969a54ef3a6ae994c0872edb259 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 25 Jan 2015 11:11:14 +0100 Subject: bcma: add early_init function for PCIe core and move some fix into it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some PCIe core fixes that need to be applied before accessing SPROM, otherwise reading it may fail. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_driver_pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 0333e605ea0d..3f809ae372c4 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -223,6 +223,7 @@ struct bcma_drv_pci_host { struct bcma_drv_pci { struct bcma_device *core; + u8 early_setup_done:1; u8 setup_done:1; u8 hostmode:1; @@ -237,6 +238,7 @@ struct bcma_drv_pci { #define pcicore_write16(pc, offset, val) bcma_write16((pc)->core, offset, val) #define pcicore_write32(pc, offset, val) bcma_write32((pc)->core, offset, val) +extern void bcma_core_pci_early_init(struct bcma_drv_pci *pc); extern void bcma_core_pci_init(struct bcma_drv_pci *pc); extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core, bool enable); -- cgit v1.2.3 From 8be08a39d498d5d93ff5149276e34ccb4ec3757f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 25 Jan 2015 13:41:19 +0100 Subject: bcma: implement host code support for PCIe Gen 2 devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is stil incomplete, so we don't add PCI IDs of new devices yet. Purpose of this patch is to allow testing & adjusting rest of the code. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 1 + include/linux/bcma/bcma_regs.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index eb1c6a47b67f..994739da827f 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -318,6 +318,7 @@ struct bcma_bus { const struct bcma_host_ops *ops; enum bcma_hosttype hosttype; + bool host_is_pcie2; /* Used for BCMA_HOSTTYPE_PCI only */ union { /* Pointer to the PCI bus (only for BCMA_HOSTTYPE_PCI) */ struct pci_dev *host_pci; diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h index e64ae7bf80a1..ebd5c1fcdea4 100644 --- a/include/linux/bcma/bcma_regs.h +++ b/include/linux/bcma/bcma_regs.h @@ -64,6 +64,8 @@ #define BCMA_PCI_GPIO_XTAL 0x40 /* PCI config space GPIO 14 for Xtal powerup */ #define BCMA_PCI_GPIO_PLL 0x80 /* PCI config space GPIO 15 for PLL powerdown */ +#define BCMA_PCIE2_BAR0_WIN2 0x70 + /* SiliconBackplane Address Map. * All regions may not exist on all chips. */ -- cgit v1.2.3 From 0501be6429e4eb02f417ad83eacd84b8c57b0283 Mon Sep 17 00:00:00 2001 From: Alexey Skidanov Date: Thu, 29 Jan 2015 10:49:43 +0200 Subject: mmc: Resolve BKOPS compatability issue This patch is coming to fix compatibility issue of BKOPS_EN field of EXT_CSD. In eMMC-5.1, BKOPS_EN was changed, and now it has two operational bits: Bit 0 - MANUAL_EN Bit 1 - AUTO_EN In previous eMMC revisions, only Bit 0 was supported. Signed-off-by: Alexey Skidanov Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 2 +- include/linux/mmc/mmc.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 4d69c00497bd..a6cf4c063e4e 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -83,7 +83,7 @@ struct mmc_ext_csd { bool hpi; /* HPI support bit */ unsigned int hpi_cmd; /* cmd used as HPI */ bool bkops; /* background support bit */ - bool bkops_en; /* background enable bit */ + bool man_bkops_en; /* manual bkops enable bit */ unsigned int data_sector_size; /* 512 bytes or 4KB */ unsigned int data_tag_unit_size; /* DATA TAG UNIT size */ unsigned int boot_ro_lock; /* ro lock support */ diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index fb97b5cc91cd..124f562118b8 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -427,6 +427,11 @@ struct _mmc_csd { */ #define EXT_CSD_BKOPS_LEVEL_2 0x2 +/* + * BKOPS modes + */ +#define EXT_CSD_MANUAL_BKOPS_MASK 0x01 + /* * MMC_SWITCH access modes */ -- cgit v1.2.3 From 33692f27597fcab536d7cbbcc8f52905133e4aa7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 29 Jan 2015 10:51:32 -0800 Subject: vm: add VM_FAULT_SIGSEGV handling support The core VM already knows about VM_FAULT_SIGBUS, but cannot return a "you should SIGSEGV" error, because the SIGSEGV case was generally handled by the caller - usually the architecture fault handler. That results in lots of duplication - all the architecture fault handlers end up doing very similar "look up vma, check permissions, do retries etc" - but it generally works. However, there are cases where the VM actually wants to SIGSEGV, and applications _expect_ SIGSEGV. In particular, when accessing the stack guard page, libsigsegv expects a SIGSEGV. And it usually got one, because the stack growth is handled by that duplicated architecture fault handler. However, when the generic VM layer started propagating the error return from the stack expansion in commit fee7e49d4514 ("mm: propagate error from stack expansion even for guard page"), that now exposed the existing VM_FAULT_SIGBUS result to user space. And user space really expected SIGSEGV, not SIGBUS. To fix that case, we need to add a VM_FAULT_SIGSEGV, and teach all those duplicate architecture fault handlers about it. They all already have the code to handle SIGSEGV, so it's about just tying that new return value to the existing code, but it's all a bit annoying. This is the mindless minimal patch to do this. A more extensive patch would be to try to gather up the mostly shared fault handling logic into one generic helper routine, and long-term we really should do that cleanup. Just from this patch, you can generally see that most architectures just copied (directly or indirectly) the old x86 way of doing things, but in the meantime that original x86 model has been improved to hold the VM semaphore for shorter times etc and to handle VM_FAULT_RETRY and other "newer" things, so it would be a good idea to bring all those improvements to the generic case and teach other architectures about them too. Reported-and-tested-by: Takashi Iwai Tested-by: Jan Engelhardt Acked-by: Heiko Carstens # "s390 still compiles and boots" Cc: linux-arch@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 80fc92a49649..dd5ea3016fc4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1070,6 +1070,7 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ #define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned small page */ #define VM_FAULT_HWPOISON_LARGE 0x0020 /* Hit poisoned large page. Index encoded in upper bits */ +#define VM_FAULT_SIGSEGV 0x0040 #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ @@ -1078,8 +1079,9 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ -#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \ - VM_FAULT_FALLBACK | VM_FAULT_HWPOISON_LARGE) +#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \ + VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \ + VM_FAULT_FALLBACK) /* Encode hstate index for a hwpoisoned large page */ #define VM_FAULT_SET_HINDEX(x) ((x) << 12) -- cgit v1.2.3 From 7866a621043fbaca3d7389e9b9f69dd1a2e5a855 Mon Sep 17 00:00:00 2001 From: Salam Noureddine Date: Tue, 27 Jan 2015 11:35:48 -0800 Subject: dev: add per net_device packet type chains When many pf_packet listeners are created on a lot of interfaces the current implementation using global packet type lists scales poorly. This patch adds per net_device packet type lists to fix this problem. The patch was originally written by Eric Biederman for linux-2.6.29. Tested on linux-3.16. Signed-off-by: "Eric W. Biederman" Signed-off-by: Salam Noureddine Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 642d426a668f..3d37c6eb1732 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1514,6 +1514,8 @@ struct net_device { struct list_head napi_list; struct list_head unreg_list; struct list_head close_list; + struct list_head ptype_all; + struct list_head ptype_specific; struct { struct list_head upper; -- cgit v1.2.3 From f262f28c147051e7aa6daaf4fb5996833ffadff4 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 26 Jan 2015 13:16:27 +0900 Subject: PM / devfreq: event: Add devfreq_event class This patch adds a new class in devfreq, devfreq_event, which provides raw data (e.g., memory bus utilization, GPU utilization) for devfreq governors. - devfreq_event device : Provides raw data for a governor of a devfreq device - devfreq device : Monitors device state and changes frequency/voltage of the device using the raw data from its devfreq_event device. A devfreq device dertermines performance states (normally the frequency and the voltage vlues) based on the results its designtated devfreq governor: e.g., ondemand, performance, powersave. In order to give such results required by a devfreq device, the devfreq governor requires data that indicates the performance requirement given to the devfreq device. The conventional (previous) implementatino of devfreq subsystem requires a devfreq device driver to implement its own mechanism to acquire performance requirement for its governor. However, there had been issues with such requirements: 1. Although performance requirement of such devices is usually acquired from common devices (PMU/PPMU), we do not have any abstract structure to represent them properly. 2. Such performance requirement devices (PMU/PPMU) are actual hardware pieces that may be represented by Device Tree directly while devfreq device itself is a virtual entity that are not considered to be represented by Device Tree according to Device Tree folks. In order to address such issues, a devferq_event device (represented by this patch) provides a template for device drivers representing performance monitoring unit, which gives the basic or raw data for preformance requirement, which in turn, is required by devfreq governors. The following description explains the feature of two kind of devfreq class: - devfreq class (existing) : devfreq consumer device use raw data from devfreq_event device for determining proper current system state and change voltage/frequency dynamically using various governors. - devfreq_event class (new) : Provide measured raw data to devfreq device for governor Cc: MyungJoo Ham Cc: Kyungmin Park Signed-off-by: Chanwoo Choi [Commit message rewritten & conflict resolved by MyungJoo] Signed-off-by: MyungJoo Ham --- include/linux/devfreq-event.h | 196 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 include/linux/devfreq-event.h (limited to 'include/linux') diff --git a/include/linux/devfreq-event.h b/include/linux/devfreq-event.h new file mode 100644 index 000000000000..602fbbfcfeed --- /dev/null +++ b/include/linux/devfreq-event.h @@ -0,0 +1,196 @@ +/* + * devfreq-event: a framework to provide raw data and events of devfreq devices + * + * Copyright (C) 2014 Samsung Electronics + * Author: Chanwoo Choi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_DEVFREQ_EVENT_H__ +#define __LINUX_DEVFREQ_EVENT_H__ + +#include + +/** + * struct devfreq_event_dev - the devfreq-event device + * + * @node : Contain the devfreq-event device that have been registered. + * @dev : the device registered by devfreq-event class. dev.parent is + * the device using devfreq-event. + * @lock : a mutex to protect accessing devfreq-event. + * @enable_count: the number of enable function have been called. + * @desc : the description for devfreq-event device. + * + * This structure contains devfreq-event device information. + */ +struct devfreq_event_dev { + struct list_head node; + + struct device dev; + struct mutex lock; + u32 enable_count; + + const struct devfreq_event_desc *desc; +}; + +/** + * struct devfreq_event_data - the devfreq-event data + * + * @load_count : load count of devfreq-event device for the given period. + * @total_count : total count of devfreq-event device for the given period. + * each count may represent a clock cycle, a time unit + * (ns/us/...), or anything the device driver wants. + * Generally, utilization is load_count / total_count. + * + * This structure contains the data of devfreq-event device for polling period. + */ +struct devfreq_event_data { + unsigned long load_count; + unsigned long total_count; +}; + +/** + * struct devfreq_event_ops - the operations of devfreq-event device + * + * @enable : Enable the devfreq-event device. + * @disable : Disable the devfreq-event device. + * @reset : Reset all setting of the devfreq-event device. + * @set_event : Set the specific event type for the devfreq-event device. + * @get_event : Get the result of the devfreq-event devie with specific + * event type. + * + * This structure contains devfreq-event device operations which can be + * implemented by devfreq-event device drivers. + */ +struct devfreq_event_ops { + /* Optional functions */ + int (*enable)(struct devfreq_event_dev *edev); + int (*disable)(struct devfreq_event_dev *edev); + int (*reset)(struct devfreq_event_dev *edev); + + /* Mandatory functions */ + int (*set_event)(struct devfreq_event_dev *edev); + int (*get_event)(struct devfreq_event_dev *edev, + struct devfreq_event_data *edata); +}; + +/** + * struct devfreq_event_desc - the descriptor of devfreq-event device + * + * @name : the name of devfreq-event device. + * @driver_data : the private data for devfreq-event driver. + * @ops : the operation to control devfreq-event device. + * + * Each devfreq-event device is described with a this structure. + * This structure contains the various data for devfreq-event device. + */ +struct devfreq_event_desc { + const char *name; + void *driver_data; + + struct devfreq_event_ops *ops; +}; + +#if defined(CONFIG_PM_DEVFREQ_EVENT) +extern int devfreq_event_enable_edev(struct devfreq_event_dev *edev); +extern int devfreq_event_disable_edev(struct devfreq_event_dev *edev); +extern bool devfreq_event_is_enabled(struct devfreq_event_dev *edev); +extern int devfreq_event_set_event(struct devfreq_event_dev *edev); +extern int devfreq_event_get_event(struct devfreq_event_dev *edev, + struct devfreq_event_data *edata); +extern int devfreq_event_reset_event(struct devfreq_event_dev *edev); +extern struct devfreq_event_dev *devfreq_event_get_edev_by_phandle( + struct device *dev, int index); +extern int devfreq_event_get_edev_count(struct device *dev); +extern struct devfreq_event_dev *devfreq_event_add_edev(struct device *dev, + struct devfreq_event_desc *desc); +extern int devfreq_event_remove_edev(struct devfreq_event_dev *edev); +extern struct devfreq_event_dev *devm_devfreq_event_add_edev(struct device *dev, + struct devfreq_event_desc *desc); +extern void devm_devfreq_event_remove_edev(struct device *dev, + struct devfreq_event_dev *edev); +static inline void *devfreq_event_get_drvdata(struct devfreq_event_dev *edev) +{ + return edev->desc->driver_data; +} +#else +static inline int devfreq_event_enable_edev(struct devfreq_event_dev *edev) +{ + return -EINVAL; +} + +static inline int devfreq_event_disable_edev(struct devfreq_event_dev *edev) +{ + return -EINVAL; +} + +static inline bool devfreq_event_is_enabled(struct devfreq_event_dev *edev) +{ + return false; +} + +static inline int devfreq_event_set_event(struct devfreq_event_dev *edev) +{ + return -EINVAL; +} + +static inline int devfreq_event_get_event(struct devfreq_event_dev *edev, + struct devfreq_event_data *edata) +{ + return -EINVAL; +} + +static inline int devfreq_event_reset_event(struct devfreq_event_dev *edev) +{ + return -EINVAL; +} + +static inline void *devfreq_event_get_drvdata(struct devfreq_event_dev *edev) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct devfreq_event_dev *devfreq_event_get_edev_by_phandle( + struct device *dev, int index) +{ + return ERR_PTR(-EINVAL); +} + +static inline int devfreq_event_get_edev_count(struct device *dev) +{ + return -EINVAL; +} + +static inline struct devfreq_event_dev *devfreq_event_add_edev(struct device *dev, + struct devfreq_event_desc *desc) +{ + return ERR_PTR(-EINVAL); +} + +static inline int devfreq_event_remove_edev(struct devfreq_event_dev *edev) +{ + return -EINVAL; +} + +static inline struct devfreq_event_dev *devm_devfreq_event_add_edev( + struct device *dev, + struct devfreq_event_desc *desc) +{ + return ERR_PTR(-EINVAL); +} + +static inline void devm_devfreq_event_remove_edev(struct device *dev, + struct devfreq_event_dev *edev) +{ +} + +static inline void *devfreq_event_get_drvdata(struct devfreq_event_dev *edev) +{ + return NULL; +} +#endif /* CONFIG_PM_DEVFREQ_EVENT */ + +#endif /* __LINUX_DEVFREQ_EVENT_H__ */ -- cgit v1.2.3 From 38e478c4489a845a5e8baf7849c286af5fed5b66 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Oct 2014 15:56:21 +0200 Subject: quota: Split ->set_xstate callback into two Split ->set_xstate callback into two callbacks - one for turning quotas on (->quota_enable) and one for turning quotas off (->quota_disable). That way we don't have to pass quotactl command into the callback which seems cleaner. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- include/linux/quota.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 227f37f463c9..4da497b807c4 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -371,13 +371,14 @@ struct quotactl_ops { int (*quota_on)(struct super_block *, int, int, struct path *); int (*quota_on_meta)(struct super_block *, int, int); int (*quota_off)(struct super_block *, int); + int (*quota_enable)(struct super_block *, unsigned int); + int (*quota_disable)(struct super_block *, unsigned int); int (*quota_sync)(struct super_block *, int); int (*get_info)(struct super_block *, int, struct if_dqinfo *); int (*set_info)(struct super_block *, int, struct if_dqinfo *); int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); int (*get_xstate)(struct super_block *, struct fs_quota_stat *); - int (*set_xstate)(struct super_block *, unsigned int, int); int (*get_xstatev)(struct super_block *, struct fs_quota_statv *); int (*rm_xquota)(struct super_block *, unsigned int); }; -- cgit v1.2.3 From d3b863248577504f6eecca2a464d6ddf86b71584 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Oct 2014 16:07:12 +0200 Subject: quota: Wire up ->quota_{enable,disable} callbacks into Q_QUOTA{ON,OFF} Make Q_QUOTAON / Q_QUOTAOFF quotactl call ->quota_enable / ->quota_disable callback when provided. To match current behavior of ocfs2 & ext4 we make these quotactls turn on / off quota enforcement for appropriate quota type. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- include/linux/quotaops.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 29e3455f7d41..ff0b665591d0 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -386,4 +386,6 @@ static inline void dquot_release_reservation_block(struct inode *inode, __dquot_free_space(inode, nr << inode->i_blkbits, DQUOT_SPACE_RESERVE); } +unsigned int qtype_enforce_flag(int type); + #endif /* _LINUX_QUOTAOPS_ */ -- cgit v1.2.3 From 3e2af67e66ff025796af1a8a1fcbb4236304f90c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 6 Oct 2014 18:40:51 +0200 Subject: quota: Add ->quota_{enable,disable} callbacks for VFS quotas Add functions which translate ->quota_enable / ->quota_disable calls into appropriate changes in VFS quota. This will enable filesystems supporting VFS quota files in system inodes to be controlled via Q_XQUOTA[ON|OFF] quotactls for better userspace compatibility. Also provide a vector for quotactl using these functions which can be used by filesystems with quota files stored in hidden system files. Signed-off-by: Jan Kara --- include/linux/quotaops.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index ff0b665591d0..df73258cca47 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -166,6 +166,7 @@ static inline bool sb_has_quota_active(struct super_block *sb, int type) */ extern const struct dquot_operations dquot_operations; extern const struct quotactl_ops dquot_quotactl_ops; +extern const struct quotactl_ops dquot_quotactl_sysfile_ops; #else -- cgit v1.2.3 From aaa3daed156ff3c6acb28c8b18028f8b57d6c91b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Oct 2014 18:35:31 +0200 Subject: quota: Remove quota_on_meta callback There are no more users for quota_on_meta callback. Just remove it. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- include/linux/quota.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 4da497b807c4..0938159d65c8 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -369,7 +369,6 @@ struct qc_dqblk { /* Operations handling requests from userspace */ struct quotactl_ops { int (*quota_on)(struct super_block *, int, int, struct path *); - int (*quota_on_meta)(struct super_block *, int, int); int (*quota_off)(struct super_block *, int); int (*quota_enable)(struct super_block *, unsigned int); int (*quota_disable)(struct super_block *, unsigned int); -- cgit v1.2.3 From b10a08194c2b615955dfab2300331a90ae9344c7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 9 Oct 2014 16:54:13 +0200 Subject: quota: Store maximum space limit in bytes Currently maximum space limit quota format supports is in blocks however since we store space limits in bytes, this is somewhat confusing. So store the maximum limit in bytes as well. Also rename the field to match the new unit and related inode field to match the new naming scheme. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- include/linux/quota.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 0938159d65c8..d534e8ed308a 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -216,8 +216,8 @@ struct mem_dqinfo { unsigned long dqi_flags; unsigned int dqi_bgrace; unsigned int dqi_igrace; - qsize_t dqi_maxblimit; - qsize_t dqi_maxilimit; + qsize_t dqi_max_spc_limit; + qsize_t dqi_max_ino_limit; void *dqi_priv; }; -- cgit v1.2.3 From d4bcef3fbe887ff93b58da4fcf6df1eee416e8fa Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 29 Jan 2015 20:37:07 +0900 Subject: net: Fix vlan_get_protocol for stacked vlan vlan_get_protocol() could not get network protocol if a skb has a 802.1ad vlan tag or multiple vlans, which caused incorrect checksum calculation in several drivers. Fix vlan_get_protocol() to retrieve network protocol instead of incorrect vlan protocol. As the logic is the same as skb_network_protocol(), create a common helper function __vlan_get_protocol() and call it from existing functions. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 60 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 515a35e2a48a..960e666c51e4 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -472,27 +472,59 @@ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) /** * vlan_get_protocol - get protocol EtherType. * @skb: skbuff to query + * @type: first vlan protocol + * @depth: buffer to store length of eth and vlan tags in bytes * * Returns the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ -static inline __be16 vlan_get_protocol(const struct sk_buff *skb) +static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, + int *depth) { - __be16 protocol = 0; - - if (vlan_tx_tag_present(skb) || - skb->protocol != cpu_to_be16(ETH_P_8021Q)) - protocol = skb->protocol; - else { - __be16 proto, *protop; - protop = skb_header_pointer(skb, offsetof(struct vlan_ethhdr, - h_vlan_encapsulated_proto), - sizeof(proto), &proto); - if (likely(protop)) - protocol = *protop; + unsigned int vlan_depth = skb->mac_len; + + /* if type is 802.1Q/AD then the header should already be + * present at mac_len - VLAN_HLEN (if mac_len > 0), or at + * ETH_HLEN otherwise + */ + if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { + if (vlan_depth) { + if (WARN_ON(vlan_depth < VLAN_HLEN)) + return 0; + vlan_depth -= VLAN_HLEN; + } else { + vlan_depth = ETH_HLEN; + } + do { + struct vlan_hdr *vh; + + if (unlikely(!pskb_may_pull(skb, + vlan_depth + VLAN_HLEN))) + return 0; + + vh = (struct vlan_hdr *)(skb->data + vlan_depth); + type = vh->h_vlan_encapsulated_proto; + vlan_depth += VLAN_HLEN; + } while (type == htons(ETH_P_8021Q) || + type == htons(ETH_P_8021AD)); } - return protocol; + if (depth) + *depth = vlan_depth; + + return type; +} + +/** + * vlan_get_protocol - get protocol EtherType. + * @skb: skbuff to query + * + * Returns the EtherType of the packet, regardless of whether it is + * vlan encapsulated (normal or hardware accelerated) or not. + */ +static inline __be16 vlan_get_protocol(struct sk_buff *skb) +{ + return __vlan_get_protocol(skb, skb->protocol, NULL); } static inline void vlan_set_encap_proto(struct sk_buff *skb, -- cgit v1.2.3 From 448c7f3830ca283e485aa943279acea6bde8b270 Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Sun, 1 Feb 2015 11:25:14 -0800 Subject: Input: MT - add support for balanced slot assignment Some devices are not fast enough to differentiate between a fast-moving contact and a new contact. This problem cannot be fully resolved because information is truly missing, but it is possible to safe-guard against obvious mistakes by restricting movement with a maximum displacement. The new problem formulation for dmax > 0 cannot benefit from the speedup for positive definite matrices, but since the convergence is faster, the result is about the same. For a handful of contacts, the latency difference is truly negligible. Suggested-by: Benjamin Tissoires Tested-by: Benjamin Tissoires Signed-off-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- include/linux/input/mt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index f583ff639776..d7188de4db96 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -119,7 +119,8 @@ struct input_mt_pos { }; int input_mt_assign_slots(struct input_dev *dev, int *slots, - const struct input_mt_pos *pos, int num_pos); + const struct input_mt_pos *pos, int num_pos, + int dmax); int input_mt_get_slot_by_key(struct input_dev *dev, int key); -- cgit v1.2.3 From 00845eb968ead28007338b2bb852b8beef816583 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Feb 2015 12:23:32 -0800 Subject: sched: don't cause task state changes in nested sleep debugging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 8eb23b9f35aa ("sched: Debug nested sleeps") added code to report on nested sleep conditions, which we generally want to avoid because the inner sleeping operation can re-set the thread state to TASK_RUNNING, but that will then cause the outer sleep loop not actually sleep when it calls schedule. However, that's actually valid traditional behavior, with the inner sleep being some fairly rare case (like taking a sleeping lock that normally doesn't actually need to sleep). And the debug code would actually change the state of the task to TASK_RUNNING internally, which makes that kind of traditional and working code not work at all, because now the nested sleep doesn't just sometimes cause the outer one to not block, but will cause it to happen every time. In particular, it will cause the cardbus kernel daemon (pccardd) to basically busy-loop doing scheduling, converting a laptop into a heater, as reported by Bruno Prémont. But there may be other legacy uses of that nested sleep model in other drivers that are also likely to never get converted to the new model. This fixes both cases: - don't set TASK_RUNNING when the nested condition happens (note: even if WARN_ONCE() only _warns_ once, the return value isn't whether the warning happened, but whether the condition for the warning was true. So despite the warning only happening once, the "if (WARN_ON(..))" would trigger for every nested sleep. - in the cases where we knowingly disable the warning by using "sched_annotate_sleep()", don't change the task state (that is used for all core scheduling decisions), instead use '->task_state_change' that is used for the debugging decision itself. (Credit for the second part of the fix goes to Oleg Nesterov: "Can't we avoid this subtle change in behaviour DEBUG_ATOMIC_SLEEP adds?" with the suggested change to use 'task_state_change' as part of the test) Reported-and-bisected-by: Bruno Prémont Tested-by: Rafael J Wysocki Acked-by: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner , Cc: Ilya Dryomov , Cc: Mike Galbraith Cc: Ingo Molnar Cc: Peter Hurley , Cc: Davidlohr Bueso , Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5449d2f4a1ef..64ce58bee6f5 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -176,7 +176,7 @@ extern int _cond_resched(void); */ # define might_sleep() \ do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) -# define sched_annotate_sleep() __set_current_state(TASK_RUNNING) +# define sched_annotate_sleep() (current->task_state_change = 0) #else static inline void ___might_sleep(const char *file, int line, int preempt_offset) { } -- cgit v1.2.3 From fd979c0132074856975a6e79bc2226b99435ec5b Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 30 Jan 2015 13:45:57 -0800 Subject: perf: provide sysfs_show for struct perf_pmu_events_attr (struct perf_pmu_events_attr) is defined in include/linux/perf_event.h, but the only "show" for it is in x86 and contains x86 specific stuff. Make a generic one for those of us who are just using the event_str. Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa Signed-off-by: Michael Ellerman --- include/linux/perf_event.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 486e84ccb1f9..58f59bdb590b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -897,6 +897,9 @@ struct perf_pmu_events_attr { const char *event_str; }; +ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page); + #define PMU_EVENT_ATTR(_name, _var, _id, _show) \ static struct perf_pmu_events_attr _var = { \ .attr = __ATTR(_name, 0444, _show, NULL), \ -- cgit v1.2.3 From f0405b816149665393cc62b9e5082fc2d79714df Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 30 Jan 2015 13:45:58 -0800 Subject: perf: add PMU_EVENT_ATTR_STRING() helper Helper for constructing static struct perf_pmu_events_attr s. Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa Signed-off-by: Michael Ellerman --- include/linux/perf_event.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 58f59bdb590b..1d3631448b91 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -906,6 +906,13 @@ static struct perf_pmu_events_attr _var = { \ .id = _id, \ }; +#define PMU_EVENT_ATTR_STRING(_name, _var, _str) \ +static struct perf_pmu_events_attr _var = { \ + .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ + .id = 0, \ + .event_str = _str, \ +}; + #define PMU_FORMAT_ATTR(_name, _format) \ static ssize_t \ _name##_show(struct device *dev, \ -- cgit v1.2.3 From aafb3e98b27977148c8c86499684f8f5c3decfbb Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Thu, 29 Jan 2015 22:40:11 -0800 Subject: netdev: introduce new NETIF_F_HW_SWITCH_OFFLOAD feature flag for switch device offloads This is a high level feature flag for all switch asic offloads switch drivers set this flag on switch ports. Logical devices like bridge, bonds, vxlans can inherit this flag from their slaves/ports. The patch also adds the flag to NETIF_F_ONE_FOR_ALL, so that it gets propagated to the upperdevices (bridges and bonds). Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 8e30685affeb..7d59dc6ab789 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -66,6 +66,7 @@ enum { NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */ NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */ NETIF_F_BUSY_POLL_BIT, /* Busy poll */ + NETIF_F_HW_SWITCH_OFFLOAD_BIT, /* HW switch offload */ /* * Add your fresh new feature above and remember to update @@ -124,6 +125,7 @@ enum { #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) #define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) +#define NETIF_F_HW_SWITCH_OFFLOAD __NETIF_F(HW_SWITCH_OFFLOAD) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ @@ -159,7 +161,9 @@ enum { */ #define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ NETIF_F_SG | NETIF_F_HIGHDMA | \ - NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED) + NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED | \ + NETIF_F_HW_SWITCH_OFFLOAD) + /* * If one device doesn't support one of these features, then disable it * for all in netdev_increment_features. -- cgit v1.2.3 From add511b38266aa10c1079f9248854e6a415c4dc2 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Thu, 29 Jan 2015 22:40:12 -0800 Subject: bridge: add flags argument to ndo_bridge_setlink and ndo_bridge_dellink bridge flags are needed inside ndo_bridge_setlink/dellink handlers to avoid another call to parse IFLA_AF_SPEC inside these handlers This is used later in this series Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3d37c6eb1732..16251e96e6aa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1154,13 +1154,15 @@ struct net_device_ops { int idx); int (*ndo_bridge_setlink)(struct net_device *dev, - struct nlmsghdr *nlh); + struct nlmsghdr *nlh, + u16 flags); int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask); int (*ndo_bridge_dellink)(struct net_device *dev, - struct nlmsghdr *nlh); + struct nlmsghdr *nlh, + u16 flags); int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); int (*ndo_get_phys_port_id)(struct net_device *dev, -- cgit v1.2.3 From 15d0f5ea348b9c4e6d41df294dde38a56a39c7bf Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Feb 2015 10:07:59 -0700 Subject: Make super_blocks and sb_lock static The only user outside of fs/super.c is gone now Signed-off-by: Al Viro Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 65d02de342e1..2f717baefdf8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1183,8 +1183,6 @@ struct mm_struct; #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ -extern struct list_head super_blocks; -extern spinlock_t sb_lock; /* Possible states of 'frozen' field */ enum { -- cgit v1.2.3 From 6cae0a4648c0db2a74efb816cd2ce84390c90480 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 16 Aug 2014 13:31:51 +0200 Subject: nfs: add LAYOUT_TYPE_MAX enum value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This gives us a nice upper bound for later use in nfѕd. Signed-off-by: Christoph Hellwig --- include/linux/nfs4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 022b761dbf0a..8a3589c2542c 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -516,6 +516,7 @@ enum pnfs_layouttype { LAYOUT_NFSV4_1_FILES = 1, LAYOUT_OSD2_OBJECTS = 2, LAYOUT_BLOCK_VOLUME = 3, + LAYOUT_TYPE_MAX }; /* used for both layout return and recall */ -- cgit v1.2.3 From 11afe9f76e121e960445deee5b7f26f0787a1990 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Jan 2015 19:17:03 +0100 Subject: fs: add FL_LAYOUT lease type This (ab-)uses the file locking code to allow filesystems to recall outstanding pNFS layouts on a file. This new lease type is similar but not quite the same as FL_DELEG. A FL_LAYOUT lease can always be granted, an a per-filesystem lock (XFS iolock for the initial implementation) ensures not FL_LAYOUT leases granted when we would need to recall them. Also included are changes that allow multiple outstanding read leases of different types on the same file as long as they have a differnt owner. This wasn't a problem until now as nfsd never set FL_LEASE leases, and no one else used FL_DELEG leases, but given that nfsd will also issues FL_LAYOUT leases we will have to handle it now. Signed-off-by: Christoph Hellwig --- include/linux/fs.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ddd2fa7cefd3..84740145f835 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -875,6 +875,7 @@ static inline struct file *get_file(struct file *f) #define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */ #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ #define FL_OFDLCK 1024 /* lock is "owned" by struct file */ +#define FL_LAYOUT 2048 /* outstanding pNFS layout */ /* * Special return value from posix_lock_file() and vfs_lock_file() for @@ -2037,6 +2038,16 @@ static inline int break_deleg_wait(struct inode **delegated_inode) return ret; } +static inline int break_layout(struct inode *inode, bool wait) +{ + smp_mb(); + if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) + return __break_lease(inode, + wait ? O_WRONLY : O_WRONLY | O_NONBLOCK, + FL_LAYOUT); + return 0; +} + #else /* !CONFIG_FILE_LOCKING */ static inline int locks_mandatory_locked(struct file *file) { @@ -2092,6 +2103,11 @@ static inline int break_deleg_wait(struct inode **delegated_inode) return 0; } +static inline int break_layout(struct inode *inode, bool wait) +{ + return 0; +} + #endif /* CONFIG_FILE_LOCKING */ /* fs/open.c */ -- cgit v1.2.3 From 9cf514ccfacb301f3b1b4509a8ce25dffad55880 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 May 2014 13:11:59 +0200 Subject: nfsd: implement pNFS operations Add support for the GETDEVICEINFO, LAYOUTGET, LAYOUTCOMMIT and LAYOUTRETURN NFSv4.1 operations, as well as backing code to manage outstanding layouts and devices. Layout management is very straight forward, with a nfs4_layout_stateid structure that extends nfs4_stid to manage layout stateids as the top-level structure. It is linked into the nfs4_file and nfs4_client structures like the other stateids, and contains a linked list of layouts that hang of the stateid. The actual layout operations are implemented in layout drivers that are not part of this commit, but will be added later. The worst part of this commit is the management of the pNFS device IDs, which suffers from a specification that is not sanely implementable due to the fact that the device-IDs are global and not bound to an export, and have a small enough size so that we can't store the fsid portion of a file handle, and must never be reused. As we still do need perform all export authentication and validation checks on a device ID passed to GETDEVICEINFO we are caught between a rock and a hard place. To work around this issue we add a new hash that maps from a 64-bit integer to a fsid so that we can look up the export to authenticate against it, a 32-bit integer as a generation that we can bump when changing the device, and a currently unused 32-bit integer that could be used in the future to handle more than a single device per export. Entries in this hash table are never deleted as we can't reuse the ids anyway, and would have a severe lifetime problem anyway as Linux export structures are temporary structures that can go away under load. Parts of the XDR data, structures and marshaling/unmarshaling code, as well as many concepts are derived from the old pNFS server implementation from Andy Adamson, Benny Halevy, Dean Hildebrand, Marc Eshel, Fred Isaman, Mike Sager, Ricardo Labiaga and many others. Signed-off-by: Christoph Hellwig --- include/linux/nfs4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 8a3589c2542c..bc10d687f2ce 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -411,6 +411,7 @@ enum lock_type4 { #define FATTR4_WORD1_TIME_MODIFY_SET (1UL << 22) #define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23) #define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30) +#define FATTR4_WORD2_LAYOUT_TYPES (1UL << 0) #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) -- cgit v1.2.3 From 366e41d9774d7010cb63112b6db2fce6dc7809c0 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sat, 31 Jan 2015 10:40:13 -0500 Subject: ipv6: pull cork initialization into its own function. Pull IPv6 cork initialization into its own function that can be re-used. IPv6 specific cork data did not have an explicit data structure. This patch creats eone so that just ipv6 cork data can be as arguemts. Also, since IPv6 tries to save the flow label into inet_cork_full tructure, pass the full cork. Adjust ip6_cork_release() to take cork data structures. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- include/linux/ipv6.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 2805062c013f..4d5169f5d7d1 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -125,6 +125,12 @@ struct ipv6_mc_socklist; struct ipv6_ac_socklist; struct ipv6_fl_socklist; +struct inet6_cork { + struct ipv6_txoptions *opt; + u8 hop_limit; + u8 tclass; +}; + /** * struct ipv6_pinfo - ipv6 private area * @@ -217,11 +223,7 @@ struct ipv6_pinfo { struct ipv6_txoptions *opt; struct sk_buff *pktoptions; struct sk_buff *rxpmtu; - struct { - struct ipv6_txoptions *opt; - u8 hop_limit; - u8 tclass; - } cork; + struct inet6_cork cork; }; /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ -- cgit v1.2.3 From 5a2e87b16875f9b83b7e9494cf1fce8e17dc764a Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 2 Feb 2015 15:18:42 +0200 Subject: net/mlx4_core: Fix kernel Oops (mem corruption) when working with more than 80 VFs Commit de966c592802 (net/mlx4_core: Support more than 64 VFs) was meant to allow up to 126 VFs. However, due to leaving MLX4_MFUNC_MAX too low, using more than 80 VFs resulted in memory corruptions (and Oopses) when more than 80 VFs were requested. In addition, the number of slaves was left too high. This commit fixes these issues. Fixes: de966c592802 ("net/mlx4_core: Support more than 64 VFs") Signed-off-by: Jack Morgenstein Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 25c791e295fd..5f3a9aa7225d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -97,7 +97,7 @@ enum { MLX4_MAX_NUM_PF = 16, MLX4_MAX_NUM_VF = 126, MLX4_MAX_NUM_VF_P_PORT = 64, - MLX4_MFUNC_MAX = 80, + MLX4_MFUNC_MAX = 128, MLX4_MAX_EQ_NUM = 1024, MLX4_MFUNC_EQ_NUM = 4, MLX4_MFUNC_MAX_EQES = 8, -- cgit v1.2.3 From 3e87523897e18a3e17fc8955ed795188be737ff1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 2 Feb 2015 09:39:02 -0500 Subject: sched/wait: Remove might_sleep() from wait_event_cmd() The patch e22b886a8a43 ("sched/wait: Add might_sleep() checks") introduced a bug in the raid5 subsystem. The function raid5_quiesce() (and resize_stripes()) uses the 'cmd' part to release and acquire a spinlock (so we call the sleep primitives in atomic context), and therefore we cannot do the might_sleep() check. Remove it. Fixes: e22b886a8a43 ("sched/wait: Add might_sleep() checks") Signed-off-by: Mikulas Patocka Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Link: http://lkml.kernel.org/r/alpine.LRH.2.02.1502020935580.13510@file01.intranet.prod.int.rdu2.redhat.com Signed-off-by: Ingo Molnar --- include/linux/wait.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 2232ed16635a..37423e0e1379 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -363,7 +363,6 @@ do { \ */ #define wait_event_cmd(wq, condition, cmd1, cmd2) \ do { \ - might_sleep(); \ if (condition) \ break; \ __wait_event_cmd(wq, condition, cmd1, cmd2); \ -- cgit v1.2.3 From f0b66a2cf68ed3613fe72fe01ed309f998e2bbb3 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 3 Feb 2015 09:29:52 -0600 Subject: PCI: Add pci_device_to_OF_node() stub for !CONFIG_OF Add a stub for pci_device_to_OF_node() so drivers don't need to use #ifdef CONFIG_OF around calls to it. Signed-off-by: Kevin Hao Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 360a966a97a5..fbb5795a8d86 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1847,6 +1847,8 @@ static inline void pci_set_of_node(struct pci_dev *dev) { } static inline void pci_release_of_node(struct pci_dev *dev) { } static inline void pci_set_bus_of_node(struct pci_bus *bus) { } static inline void pci_release_bus_of_node(struct pci_bus *bus) { } +static inline struct device_node * +pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; } #endif /* CONFIG_OF */ #ifdef CONFIG_EEH -- cgit v1.2.3 From 344d635b9a41b19837ccf8083a99ea688027019c Mon Sep 17 00:00:00 2001 From: Brad Griffis Date: Tue, 3 Feb 2015 11:44:12 -0800 Subject: Input: ti_am335x_tsc - remove udelay in interrupt handler TSC interrupt handler had udelay to avoid reporting of false pen-up interrupt to user space. This patch implements workaround suggesting in Advisory 1.0.31 of silicon errata for am335x, thus eliminating udelay and touchscreen lag. This also improves performance of touchscreen and eliminates sudden jump of cursor at touch release. IDLECONFIG and CHARGECONFIG registers are to be configured with same values in order to eliminate false pen-up events. This workaround may result in false pen-down to be detected, hence considerable charge step delay needs to be added. The charge delay is set to 0xB000 (in terms of ADC clock cycles) by default. TSC steps are disabled at the end of every sampling cycle and EOS bit is set. Once the EOS bit is set, the TSC steps need to be re-enabled to begin next sampling cycle. Signed-off-by: Brad Griffis [vigneshr@ti.com: Ported the patch from v3.12 to v3.19rc1] Signed-off-by: Vignesh R Signed-off-by: Dmitry Torokhov --- include/linux/mfd/ti_am335x_tscadc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index e2e70053470e..3f4e994ace2b 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -52,6 +52,7 @@ /* IRQ enable */ #define IRQENB_HW_PEN BIT(0) +#define IRQENB_EOS BIT(1) #define IRQENB_FIFO0THRES BIT(2) #define IRQENB_FIFO0OVRRUN BIT(3) #define IRQENB_FIFO0UNDRFLW BIT(4) @@ -107,7 +108,7 @@ /* Charge delay */ #define CHARGEDLY_OPEN_MASK (0x3FFFF << 0) #define CHARGEDLY_OPEN(val) ((val) << 0) -#define CHARGEDLY_OPENDLY CHARGEDLY_OPEN(1) +#define CHARGEDLY_OPENDLY CHARGEDLY_OPEN(0x400) /* Control register */ #define CNTRLREG_TSCSSENB BIT(0) -- cgit v1.2.3 From a49170b552423a3e85fc4f0d778c707402ee4863 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 2 Feb 2015 10:42:58 +0800 Subject: ACPI: Return translation offset when parsing ACPI address space resources Change function acpi_dev_resource_address_space() and acpi_dev_resource_ext_address_space() to return address space translation offset. It's based on a patch from Yinghai Lu . Signed-off-by: Jiang Liu Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d459cd17b477..be9eaee8f4ae 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -285,12 +285,17 @@ extern int pnpacpi_disabled; #define PXM_INVAL (-1) +struct resource_win { + struct resource res; + resource_size_t offset; +}; + bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res); bool acpi_dev_resource_io(struct acpi_resource *ares, struct resource *res); bool acpi_dev_resource_address_space(struct acpi_resource *ares, - struct resource *res); + struct resource_win *win); bool acpi_dev_resource_ext_address_space(struct acpi_resource *ares, - struct resource *res); + struct resource_win *win); unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable); bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index, struct resource *res); -- cgit v1.2.3 From 93286f4798590e711aa395503401f8632fb74f9a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 2 Feb 2015 10:43:00 +0800 Subject: ACPI: Add field offset to struct resource_list_entry Add field offset to struct resource_list_entry to host address space translation offset so it could be used to represent bridge resources. Signed-off-by: Jiang Liu Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index be9eaee8f4ae..21dac3cb62d2 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -303,6 +303,7 @@ bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index, struct resource_list_entry { struct list_head node; struct resource res; + resource_size_t offset; }; void acpi_dev_free_resource_list(struct list_head *list); -- cgit v1.2.3 From 62d1141ff34e35de496ba06491c8e854b23b3f3e Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 2 Feb 2015 10:43:01 +0800 Subject: ACPI: Introduce helper function acpi_dev_filter_resource_type() Introduce helper function acpi_dev_filter_resource_type(), which may be used by acpi_dev_get_resources() to filer out resource based on resource type. Signed-off-by: Jiang Liu Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 21dac3cb62d2..e818decb631f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -310,6 +310,14 @@ void acpi_dev_free_resource_list(struct list_head *list); int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list, int (*preproc)(struct acpi_resource *, void *), void *preproc_data); +int acpi_dev_filter_resource_type(struct acpi_resource *ares, + unsigned long types); + +static inline int acpi_dev_filter_resource_type_cb(struct acpi_resource *ares, + void *arg) +{ + return acpi_dev_filter_resource_type(ares, (unsigned long)arg); +} int acpi_check_resource_conflict(const struct resource *res); -- cgit v1.2.3 From c1dbe2fbb33ef425a81e1a7cffd17c113c87cdbc Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 27 Jan 2015 21:13:39 +0100 Subject: PM / Domains: Remove reference counting for the generic_pm_domain_data The reference counting was needed when genpd supported PM domain device callbacks. Since this option has been removed, let's also remove the reference counting of the struct generic_pm_domain_data. Signed-off-by: Ulf Hansson Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index ed607760fc20..e160a0bba28d 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -114,7 +114,6 @@ struct generic_pm_domain_data { struct gpd_timing_data td; struct notifier_block nb; struct mutex lock; - unsigned int refcount; int need_restore; }; -- cgit v1.2.3 From c0356db7d1b66840882744cbd9d9c5960b2d88c7 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 27 Jan 2015 21:13:42 +0100 Subject: PM / Domains: Eliminate the mutex for the generic_pm_domain_data While adding devices to their PM domains, dev_pm_qos_add_notifier() was invoked while allocating the generic_pm_domain_data for the device. Since the generic_pm_domain_data's device pointer will be assigned after allocation, the ->genpd_dev_pm_qos_notifier() callback could be called prior having a valid pointer to the device. Similar scenario existed while removing a device from a genpd. To cope with these scenarios a mutex was used to protect the pointer to the device. By re-order the sequence for when dev_pm_qos_add|remove_notifier() are invoked, we make sure the ->genpd_dev_pm_qos_notifier() callback are always called with a valid device pointer available. In this way, we eliminate the need for protecting the pointer and thus we can remove the mutex from the struct generic_pm_domain_data. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index e160a0bba28d..080e778118ba 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -113,7 +113,6 @@ struct generic_pm_domain_data { struct pm_domain_data base; struct gpd_timing_data td; struct notifier_block nb; - struct mutex lock; int need_restore; }; -- cgit v1.2.3 From 1e95e3b2da424db68d0a465273f1901a990c6277 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 29 Jan 2015 18:39:05 +0100 Subject: PM: Convert dev_pm_put_subsys_data() into a void function Clients using the dev_pm_put_subsys_data() API isn't interested of a return value. They care only of decreasing a reference to the device's pm_subsys_data. So, let's convert the API to a void function, which anyway seems like reasonable thing to do. Signed-off-by: Ulf Hansson Acked-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 8b5976364619..e2f1be6dd9dd 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -597,7 +597,7 @@ struct dev_pm_info { extern void update_pm_runtime_accounting(struct device *dev); extern int dev_pm_get_subsys_data(struct device *dev); -extern int dev_pm_put_subsys_data(struct device *dev); +extern void dev_pm_put_subsys_data(struct device *dev); /* * Power domains provide callbacks that are executed during system suspend, -- cgit v1.2.3 From 851c63e3b381fdbf5aca1a797f37d8606b5588d2 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Mon, 5 Jan 2015 15:43:39 +0000 Subject: of: Fix brace position for struct of_device_id definition Currently it is not easy to grep for the definition of struct of_device_id. This is trivially fixed by moving the brace to the right place. Signed-off-by: Daniel Thompson Cc: Grant Likely Cc: Rob Herring Signed-off-by: Rob Herring --- include/linux/mod_devicetable.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 745def862580..bbf85d612be5 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -220,8 +220,7 @@ struct serio_device_id { /* * Struct used for matching a device */ -struct of_device_id -{ +struct of_device_id { char name[32]; char type[32]; char compatible[128]; -- cgit v1.2.3 From 4c946d9c11d173c2ea6b9081b248f8072e6b46f1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 27 Nov 2014 19:52:04 -0500 Subject: vmci: propagate msghdr all way down to __qp_memcpy_to_queue() Switch from passing msg->iov_iter.iov to passing msg itself Signed-off-by: Al Viro --- include/linux/vmw_vmci_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmw_vmci_api.h b/include/linux/vmw_vmci_api.h index 5691f752ce8f..63df3a2a8ce5 100644 --- a/include/linux/vmw_vmci_api.h +++ b/include/linux/vmw_vmci_api.h @@ -74,7 +74,7 @@ ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair, ssize_t vmci_qpair_peek(struct vmci_qp *qpair, void *buf, size_t buf_size, int mode); ssize_t vmci_qpair_enquev(struct vmci_qp *qpair, - void *iov, size_t iov_size, int mode); + struct msghdr *msg, size_t iov_size, int mode); ssize_t vmci_qpair_dequev(struct vmci_qp *qpair, struct msghdr *msg, size_t iov_size, int mode); ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, struct msghdr *msg, size_t iov_size, -- cgit v1.2.3 From af2b040e470b470bfc881981db3c796072853eae Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 27 Nov 2014 21:44:24 -0500 Subject: rxrpc: switch rxrpc_send_data() to iov_iter primitives Convert skb_add_data() to iov_iter; allows to get rid of the explicit messing with iovec in its only caller - skb_add_data() will keep advancing ->msg_iter for us, so there's no need to similate that manually. Signed-off-by: Al Viro --- include/linux/skbuff.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 85ab7d72b54c..9a8bafee1b67 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2484,19 +2484,18 @@ static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) } static inline int skb_add_data(struct sk_buff *skb, - char __user *from, int copy) + struct iov_iter *from, int copy) { const int off = skb->len; if (skb->ip_summed == CHECKSUM_NONE) { - int err = 0; - __wsum csum = csum_and_copy_from_user(from, skb_put(skb, copy), - copy, 0, &err); - if (!err) { + __wsum csum = 0; + if (csum_and_copy_from_iter(skb_put(skb, copy), copy, + &csum, from) == copy) { skb->csum = csum_block_add(skb->csum, csum, off); return 0; } - } else if (!copy_from_user(skb_put(skb, copy), from, copy)) + } else if (copy_from_iter(skb_put(skb, copy), copy, from) == copy) return 0; __skb_trim(skb, off); -- cgit v1.2.3 From 21226abb4e9f14d88238964d89b279e461ddc30c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 28 Nov 2014 15:48:29 -0500 Subject: net: switch memcpy_fromiovec()/memcpy_fromiovecend() users to copy_from_iter() That takes care of the majority of ->sendmsg() instances - most of them via memcpy_to_msg() or assorted getfrag() callbacks. One place where we still keep memcpy_fromiovecend() is tipc - there we potentially read the same data over and over; separate patch, that... Signed-off-by: Al Viro --- include/linux/skbuff.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9a8bafee1b67..b349c96dc80a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2692,8 +2692,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) { - /* XXX: stripping const */ - return memcpy_fromiovec(data, (struct iovec *)msg->msg_iter.iov, len); + return copy_from_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT; } static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len) -- cgit v1.2.3 From 31a25fae85956e3a9c778141d29e5e803fb0b124 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 28 Nov 2014 15:53:57 -0500 Subject: net: bury net/core/iovec.c - nothing in there is used anymore Signed-off-by: Al Viro --- include/linux/socket.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 6e49a14365dc..5c19cba34dce 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -318,13 +318,6 @@ struct ucred { /* IPX options */ #define IPX_TYPE 1 -extern int csum_partial_copy_fromiovecend(unsigned char *kdata, - struct iovec *iov, - int offset, - unsigned int len, __wsum *csump); -extern unsigned long iov_pages(const struct iovec *iov, int offset, - unsigned long nr_segs); - extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); -- cgit v1.2.3 From aad9a1cec7dcd1d45809b64643fce37061b17788 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2014 14:49:01 -0500 Subject: vhost: switch vhost get_indirect() to iov_iter, kill memcpy_fromiovec() Cc: Michael S. Tsirkin Cc: kvm@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Signed-off-by: Al Viro --- include/linux/uio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 1c5e453f7ea9..af3439f4ebf2 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -135,7 +135,6 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); -int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len); int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, int offset, int len); int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata, -- cgit v1.2.3 From ba7438aed924133df54a60e4cd5499d359bcf2a8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2014 15:51:28 -0500 Subject: vhost: don't bother copying iovecs in handle_rx(), kill memcpy_toiovecend() Cc: Michael S. Tsirkin Cc: kvm@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Signed-off-by: Al Viro --- include/linux/uio.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index af3439f4ebf2..02bd8a92038a 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -137,7 +137,4 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct io int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, int offset, int len); -int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata, - int offset, int len); - #endif -- cgit v1.2.3 From 57dd8a0735aabff4862025cf64ad94da3d80e620 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Dec 2014 16:03:43 -0500 Subject: vhost: vhost_scsi_handle_vq() should just use copy_from_user() it has just verified that it asks no more than the length of the first segment of iovec. And with that the last user of stuff in lib/iovec.c is gone. RIP. Cc: Michael S. Tsirkin Cc: Nicholas A. Bellinger Cc: kvm@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Signed-off-by: Al Viro --- include/linux/uio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 02bd8a92038a..3e0cb4ea3905 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -135,6 +135,4 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); -int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, - int offset, int len); #endif -- cgit v1.2.3 From 44fc0e5eec00db5fba748803c95920098089c4cc Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 30 Jan 2015 13:14:36 +0200 Subject: sched/wait: Introduce wait_on_bit_timeout() Add a new wait_on_bit_timeout() helper, basically the same as wait_on_bit() except that it also takes a 'timeout' parameter. All the building blocks like bit_wait_timeout() and out_of_line_wait_on_bit_timeout() are already in place so the addition is rather simple. Signed-off-by: Johan Hedberg Signed-off-by: Peter Zijlstra (Intel) Cc: davem@davemloft.net Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1422616476-2917-2-git-send-email-johan.hedberg@gmail.com Signed-off-by: Ingo Molnar --- include/linux/wait.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 37423e0e1379..537d58eea8a0 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -989,6 +989,32 @@ wait_on_bit_io(void *word, int bit, unsigned mode) mode); } +/** + * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * @timeout: timeout, in jiffies + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared. This is similar to wait_on_bit(), except also takes a + * timeout parameter. + * + * Returned value will be zero if the bit was cleared before the + * @timeout elapsed, or non-zero if the @timeout elapsed or process + * received a signal and the mode permitted wakeup on that signal. + */ +static inline int +wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout) +{ + might_sleep(); + if (!test_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit_timeout(word, bit, + bit_wait_timeout, + mode, timeout); +} + /** * wait_on_bit_action - wait for a bit to be cleared * @word: the word being waited on, a kernel virtual address -- cgit v1.2.3 From 2fde4f94e0a9531251e706fa57131b51b0df042e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 7 Jan 2015 15:01:54 +0000 Subject: perf: Decouple unthrottling and rotating Currently the adjusments made as part of perf_event_task_tick() use the percpu rotation lists to iterate over any active PMU contexts, but these are not used by the context rotation code, having been replaced by separate (per-context) hrtimer callbacks. However, some manipulation of the rotation lists (i.e. removal of contexts) has remained in perf_rotate_context(). This leads to the following issues: * Contexts are not always removed from the rotation lists. Removal of PMUs which have been placed in rotation lists, but have not been removed by a hrtimer callback can result in corruption of the rotation lists (when memory backing the context is freed). This has been observed to result in hangs when PMU drivers built as modules are inserted and removed around the creation of events for said PMUs. * Contexts which do not require rotation may be removed from the rotation lists as a result of a hrtimer, and will not be considered by the unthrottling code in perf_event_task_tick. This patch fixes the issue by updating the rotation ist when events are scheduled in/out, ensuring that each rotation list stays in sync with the HW state. As each event holds a refcount on the module of its PMU, this ensures that when a PMU module is unloaded none of its CPU contexts can be in a rotation list. By maintaining a list of perf_event_contexts rather than perf_event_cpu_contexts, we don't need separate paths to handle the cpu and task contexts, which also makes the code a little simpler. As the rotation_list variables are not used for rotation, these are renamed to active_ctx_list, which better matches their current function. perf_pmu_rotate_{start,stop} are renamed to perf_pmu_ctx_{activate,deactivate}. Reported-by: Johannes Jensen Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Cc: Will Deacon Cc: Arnaldo Carvalho de Melo Cc: Fengguang Wu Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20150129134511.GR17721@leverpostej Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 216653466a67..5cad0e6f3552 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -469,6 +469,7 @@ struct perf_event_context { */ struct mutex mutex; + struct list_head active_ctx_list; struct list_head pinned_groups; struct list_head flexible_groups; struct list_head event_list; @@ -519,7 +520,6 @@ struct perf_cpu_context { int exclusive; struct hrtimer hrtimer; ktime_t hrtimer_interval; - struct list_head rotation_list; struct pmu *unique_pmu; struct perf_cgroup *cgrp; }; -- cgit v1.2.3 From 12cf89b550d13eb7cb86ef182bd6c04345a33a1f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 3 Feb 2015 16:45:18 -0600 Subject: livepatch: rename config to CONFIG_LIVEPATCH Rename CONFIG_LIVE_PATCHING to CONFIG_LIVEPATCH to make the naming of the config and the code more consistent. Signed-off-by: Josh Poimboeuf Reviewed-by: Jingoo Han Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index f14c6fb262b4..95023fd8b00d 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -24,7 +24,7 @@ #include #include -#if IS_ENABLED(CONFIG_LIVE_PATCHING) +#if IS_ENABLED(CONFIG_LIVEPATCH) #include @@ -128,6 +128,6 @@ extern int klp_unregister_patch(struct klp_patch *); extern int klp_enable_patch(struct klp_patch *); extern int klp_disable_patch(struct klp_patch *); -#endif /* CONFIG_LIVE_PATCHING */ +#endif /* CONFIG_LIVEPATCH */ #endif /* _LINUX_LIVEPATCH_H_ */ -- cgit v1.2.3 From 509102760da3a21831e763560ba4715760e3fbda Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 4 Feb 2015 11:45:28 +0100 Subject: regulator: Fix build breakage on !REGULATOR Add missing stubs for regulator_suspend_prepare() and regulator_suspend_finish() to fix exynos_defconfig build without REGULATOR: arch/arm/mach-exynos/built-in.o: In function `exynos_suspend_finish': arch/arm/mach-exynos/suspend.c:537: undefined reference to `regulator_suspend_finish' arch/arm/mach-exynos/built-in.o: In function `exynos_suspend_prepare': arch/arm/mach-exynos/suspend.c:520: undefined reference to `regulator_suspend_prepare' make: *** [vmlinux] Error 1 Signed-off-by: Krzysztof Kozlowski Reported-by: Joerg Roedel Reported-by: Marek Szyprowski Signed-off-by: Mark Brown --- include/linux/regulator/machine.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 0b08d05d470b..b07562e082c4 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -191,15 +191,22 @@ struct regulator_init_data { void *driver_data; /* core does not touch this */ }; -int regulator_suspend_prepare(suspend_state_t state); -int regulator_suspend_finish(void); - #ifdef CONFIG_REGULATOR void regulator_has_full_constraints(void); +int regulator_suspend_prepare(suspend_state_t state); +int regulator_suspend_finish(void); #else static inline void regulator_has_full_constraints(void) { } +static inline int regulator_suspend_prepare(suspend_state_t state) +{ + return 0; +} +static inline int regulator_suspend_finish(void) +{ + return 0; +} #endif #endif -- cgit v1.2.3 From 9064bf3c3cdf92f6b9ac6634ff570dedf0035992 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 3 Feb 2015 17:03:35 -0600 Subject: spi: spi-pxa2xx: only include mach/dma.h for legacy DMA Move the include of mach/dma.h to the legacy PXA DMA code where it is used. This enables building spi-pxa2xx on ARM64. Signed-off-by: Rob Herring Signed-off-by: Mark Brown --- include/linux/spi/pxa2xx_spi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index d5a316550177..46d8fa942631 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -57,7 +57,6 @@ struct pxa2xx_spi_chip { #if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP) #include -#include extern void pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_master *info); -- cgit v1.2.3 From 2bd82484bb4c5db1d5dc983ac7c409b2782e0154 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 3 Feb 2015 23:48:24 -0800 Subject: xps: fix xps for stacked devices A typical qdisc setup is the following : bond0 : bonding device, using HTB hierarchy eth1/eth2 : slaves, multiqueue NIC, using MQ + FQ qdisc XPS allows to spread packets on specific tx queues, based on the cpu doing the send. Problem is that dequeues from bond0 qdisc can happen on random cpus, due to the fact that qdisc_run() can dequeue a batch of packets. CPUA -> queue packet P1 on bond0 qdisc, P1->ooo_okay=1 CPUA -> queue packet P2 on bond0 qdisc, P2->ooo_okay=0 CPUB -> dequeue packet P1 from bond0 enqueue packet on eth1/eth2 CPUC -> dequeue packet P2 from bond0 enqueue packet on eth1/eth2 using sk cache (ooo_okay is 0) get_xps_queue() then might select wrong queue for P1, since current cpu might be different than CPUA. P2 might be sent on the old queue (stored in sk->sk_tx_queue_mapping), if CPUC runs a bit faster (or CPUB spins a bit on qdisc lock) Effect of this bug is TCP reorders, and more generally not optimal TX queue placement. (A victim bulk flow can be migrated to the wrong TX queue for a while) To fix this, we have to record sender cpu number the first time dev_queue_xmit() is called for one tx skb. We can union napi_id (used on receive path) and sender_cpu, granted we clear sender_cpu in skb_scrub_packet() (credit to Willem for this union idea) Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Nandita Dukkipati Cc: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 85ab7d72b54c..2748ff639144 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -626,8 +626,11 @@ struct sk_buff { __u32 hash; __be16 vlan_proto; __u16 vlan_tci; -#ifdef CONFIG_NET_RX_BUSY_POLL - unsigned int napi_id; +#if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS) + union { + unsigned int napi_id; + unsigned int sender_cpu; + }; #endif #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; -- cgit v1.2.3 From dcdc8994697faa789669c3fdaca1a8bc27a8f356 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 2 Feb 2015 16:07:34 -0800 Subject: net: add skb functions to process remote checksum offload This patch adds skb_remcsum_process and skb_gro_remcsum_process to perform the appropriate adjustments to the skb when receiving remote checksum offload. Updated vxlan and gue to use these functions. Tested: Ran TCP_RR and TCP_STREAM netperf for VXLAN and GUE, did not see any change in performance. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 +++++++++++++++ include/linux/skbuff.h | 21 +++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 16251e96e6aa..1347ac50d2af 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2318,6 +2318,21 @@ do { \ compute_pseudo(skb, proto)); \ } while (0) +static inline void skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, + int start, int offset) +{ + __wsum delta; + + BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); + + delta = remcsum_adjust(ptr, NAPI_GRO_CB(skb)->csum, start, offset); + + /* Adjust skb->csum since we changed the packet */ + skb->csum = csum_add(skb->csum, delta); + NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); +} + + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2748ff639144..5405dfe02572 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3099,6 +3099,27 @@ do { \ compute_pseudo(skb, proto)); \ } while (0) +/* Update skbuf and packet to reflect the remote checksum offload operation. + * When called, ptr indicates the starting point for skb->csum when + * ip_summed is CHECKSUM_COMPLETE. If we need create checksum complete + * here, skb_postpull_rcsum is done so skb->csum start is ptr. + */ +static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr, + int start, int offset) +{ + __wsum delta; + + if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) { + __skb_checksum_complete(skb); + skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data); + } + + delta = remcsum_adjust(ptr, skb->csum, start, offset); + + /* Adjust skb->csum since we changed the packet */ + skb->csum = csum_add(skb->csum, delta); +} + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) -- cgit v1.2.3 From 61bd3857ff2c7daf756d49b41e6277bbdaa8f789 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:29 +0200 Subject: net/core: Add event for a change in slave state Add event which provides an indication on a change in the state of a bonding slave. The event handler should cast the pointer to the appropriate type (struct netdev_bonding_info) in order to get the full info about the slave. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1347ac50d2af..ce784d5018e0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -51,6 +51,7 @@ #include #include #include +#include struct netpoll_info; struct device; @@ -2056,6 +2057,7 @@ struct pcpu_sw_netstats { #define NETDEV_RESEND_IGMP 0x0016 #define NETDEV_PRECHANGEMTU 0x0017 /* notify before mtu change happened */ #define NETDEV_CHANGEINFODATA 0x0018 +#define NETDEV_BONDING_INFO 0x0019 int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); @@ -3494,6 +3496,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, netdev_features_t features); +struct netdev_bonding_info { + ifslave slave; + ifbond master; +}; + +struct netdev_notifier_bonding_info { + struct netdev_notifier_info info; /* must be first */ + struct netdev_bonding_info bonding_info; +}; + +void netdev_bonding_info_change(struct net_device *dev, + struct netdev_bonding_info *bonding_info); + static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { -- cgit v1.2.3 From 59e14e325066be49b49b6c2503337c69a9ee29fc Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:32 +0200 Subject: net/mlx4_core: Port aggregation low level interface Implement the hardware interface required for port aggregation. 1. Disable RX port check on receive - don't perform a validity check that matches to QP's port and the port where the packet is received. 2. Virtual to physical port remap - configure virtual to physical port mapping. Port remap capability for virtual functions. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 7 +++++++ include/linux/mlx4/device.h | 10 +++++++++- include/linux/mlx4/qp.h | 1 + 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index ae95adc78509..7b6d4e9ff603 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -71,6 +71,7 @@ enum { /*master notify fw on finish for slave's flr*/ MLX4_CMD_INFORM_FLR_DONE = 0x5b, + MLX4_CMD_VIRT_PORT_MAP = 0x5c, MLX4_CMD_GET_OP_REQ = 0x59, /* TPT commands */ @@ -170,6 +171,12 @@ enum { MLX4_CMD_TIME_CLASS_C = 60000, }; +enum { + /* virtual to physical port mapping opcode modifiers */ + MLX4_GET_PORT_VIRT2PHY = 0x0, + MLX4_SET_PORT_VIRT2PHY = 0x1, +}; + enum { MLX4_MAILBOX_SIZE = 4096, MLX4_ACCESS_MEM_ALIGN = 256, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c95d659a39f2..d9afd99dde39 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -201,7 +201,8 @@ enum { MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 17, MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18, MLX4_DEV_CAP_FLAG2_FS_A0 = 1LL << 19, - MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20 + MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20, + MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21 }; enum { @@ -253,9 +254,14 @@ enum { MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10, MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11, + MLX4_BMME_FLAG_PORT_REMAP = 1 << 24, MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28, }; +enum { + MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP +}; + enum mlx4_event { MLX4_EVENT_TYPE_COMP = 0x00, MLX4_EVENT_TYPE_PATH_MIG = 0x01, @@ -1378,6 +1384,8 @@ int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port); int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port); int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port); +int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis); +int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2); int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port); int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port); int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 467ccdf94c98..2bbc62aa818a 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -96,6 +96,7 @@ enum { MLX4_QP_BIT_RRE = 1 << 15, MLX4_QP_BIT_RWE = 1 << 14, MLX4_QP_BIT_RAE = 1 << 13, + MLX4_QP_BIT_FPP = 1 << 3, MLX4_QP_BIT_RIC = 1 << 4, }; -- cgit v1.2.3 From 53f33ae295a5098f12218da1400f55ad7df7447c Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 3 Feb 2015 16:48:33 +0200 Subject: net/mlx4_core: Port aggregation upper layer interface Supply interface functions to bond and unbond ports of a mlx4 internal interfaces. Example for such an interface is the one registered by the mlx4 IB driver under RoCE. There are 1. Functions to go in/out to/from bonded mode 2. Function to remap virtual ports to physical ports The bond_mutex prevents simultaneous access to data that keep status of the device in bonded mode. The upper mlx4 interface marks to the mlx4 core module that they want to be subject for such bonding by setting the MLX4_INTFF_BONDING flag. Interface which goes to/from bonded mode is re-created. The mlx4 Ethernet driver does not set this flag when registering the interface, the IB driver does. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + include/linux/mlx4/driver.h | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d9afd99dde39..977b0b164431 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -70,6 +70,7 @@ enum { MLX4_FLAG_SLAVE = 1 << 3, MLX4_FLAG_SRIOV = 1 << 4, MLX4_FLAG_OLD_REG_MAC = 1 << 6, + MLX4_FLAG_BONDED = 1 << 7 }; enum { diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index 022055c8fb26..9553a73d2049 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -49,6 +49,10 @@ enum mlx4_dev_event { MLX4_DEV_EVENT_SLAVE_SHUTDOWN, }; +enum { + MLX4_INTFF_BONDING = 1 << 0 +}; + struct mlx4_interface { void * (*add) (struct mlx4_dev *dev); void (*remove)(struct mlx4_dev *dev, void *context); @@ -57,11 +61,26 @@ struct mlx4_interface { void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port); struct list_head list; enum mlx4_protocol protocol; + int flags; }; int mlx4_register_interface(struct mlx4_interface *intf); void mlx4_unregister_interface(struct mlx4_interface *intf); +int mlx4_bond(struct mlx4_dev *dev); +int mlx4_unbond(struct mlx4_dev *dev); +static inline int mlx4_is_bonded(struct mlx4_dev *dev) +{ + return !!(dev->flags & MLX4_FLAG_BONDED); +} + +struct mlx4_port_map { + u8 port1; + u8 port2; +}; + +int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p); + void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port); static inline u64 mlx4_mac_to_u64(u8 *addr) -- cgit v1.2.3 From f2dba9c6ff0d9a515b4c3f1b037cd65c8b2a868c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 4 Feb 2015 07:33:23 +1100 Subject: rhashtable: Introduce rhashtable_walk_* Some existing rhashtable users get too intimate with it by walking the buckets directly. This prevents us from easily changing the internals of rhashtable. This patch adds the helpers rhashtable_walk_init/exit/start/next/stop which will replace these custom walkers. They are meant to be usable for both procfs seq_file walks as well as walking by a netlink dump. The iterator structure should fit inside a netlink dump cb structure, with at least one element to spare. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index e0337844358e..58851275fed9 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -18,6 +18,7 @@ #ifndef _LINUX_RHASHTABLE_H #define _LINUX_RHASHTABLE_H +#include #include #include #include @@ -111,6 +112,7 @@ struct rhashtable_params { * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping + * @walkers: List of active walkers * @being_destroyed: True if table is set up for destruction */ struct rhashtable { @@ -121,9 +123,36 @@ struct rhashtable { struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; + struct list_head walkers; bool being_destroyed; }; +/** + * struct rhashtable_walker - Hash table walker + * @list: List entry on list of walkers + * @resize: Resize event occured + */ +struct rhashtable_walker { + struct list_head list; + bool resize; +}; + +/** + * struct rhashtable_iter - Hash table iterator, fits into netlink cb + * @ht: Table to iterate through + * @p: Current pointer + * @walker: Associated rhashtable walker + * @slot: Current slot + * @skip: Number of entries to skip in slot + */ +struct rhashtable_iter { + struct rhashtable *ht; + struct rhash_head *p; + struct rhashtable_walker *walker; + unsigned int slot; + unsigned int skip; +}; + static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash) { return NULLS_MARKER(ht->p.nulls_base + hash); @@ -179,6 +208,12 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht, bool (*compare)(void *, void *), void *arg); +int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); +void rhashtable_walk_exit(struct rhashtable_iter *iter); +int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); +void *rhashtable_walk_next(struct rhashtable_iter *iter); +void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); + void rhashtable_destroy(struct rhashtable *ht); #define rht_dereference(p, ht) \ -- cgit v1.2.3 From 7fbc1067f06098c6b674e672fbb17e758fcc9402 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Oct 2013 10:32:35 +0100 Subject: exportfs: add methods for block layout exports Add three methods to allow exporting pnfs block layout volumes: - get_uuid: get a filesystem unique signature exposed to clients - map_blocks: map and if nessecary allocate blocks for a layout - commit_blocks: commit blocks in a layout once the client is done with them For now we stick the external pnfs block layout interfaces into s_export_op to avoid mixing them up with the internal interface between the NFS server and the layout drivers. Once we've fully internalized the latter interface we can redecide if these methods should stay in s_export_ops. Signed-off-by: Christoph Hellwig --- include/linux/exportfs.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 41b223a59a63..fa05e04c5531 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -4,6 +4,7 @@ #include struct dentry; +struct iattr; struct inode; struct super_block; struct vfsmount; @@ -180,6 +181,21 @@ struct fid { * get_name is not (which is possibly inconsistent) */ +/* types of block ranges for multipage write mappings. */ +#define IOMAP_HOLE 0x01 /* no blocks allocated, need allocation */ +#define IOMAP_DELALLOC 0x02 /* delayed allocation blocks */ +#define IOMAP_MAPPED 0x03 /* blocks allocated @blkno */ +#define IOMAP_UNWRITTEN 0x04 /* blocks allocated @blkno in unwritten state */ + +#define IOMAP_NULL_BLOCK -1LL /* blkno is not valid */ + +struct iomap { + sector_t blkno; /* first sector of mapping */ + loff_t offset; /* file offset of mapping, bytes */ + u64 length; /* length of mapping, bytes */ + int type; /* type of mapping */ +}; + struct export_operations { int (*encode_fh)(struct inode *inode, __u32 *fh, int *max_len, struct inode *parent); @@ -191,6 +207,13 @@ struct export_operations { struct dentry *child); struct dentry * (*get_parent)(struct dentry *child); int (*commit_metadata)(struct inode *inode); + + int (*get_uuid)(struct super_block *sb, u8 *buf, u32 *len, u64 *offset); + int (*map_blocks)(struct inode *inode, loff_t offset, + u64 len, struct iomap *iomap, + bool write, u32 *device_generation); + int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, + int nr_iomaps, struct iattr *iattr); }; extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, -- cgit v1.2.3 From 90e97820619dc912b52cc9d103272819d8b51259 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 5 Feb 2015 13:44:43 +0800 Subject: resources: Move struct resource_list_entry from ACPI into resource core Currently ACPI, PCI and pnp all implement the same resource list management with different data structure. We need to transfer from one data structure into another when passing resources from one subsystem into another subsystem. So move struct resource_list_entry from ACPI into resource core and rename it as resource_entry, then it could be reused by different subystems and avoid the data structure conversion. Introduce dedicated header file resource_ext.h instead of embedding it into ioport.h to avoid header file inclusion order issues. Signed-off-by: Jiang Liu Acked-by: Vinod Koul Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 12 +------ include/linux/resource_ext.h | 77 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 11 deletions(-) create mode 100644 include/linux/resource_ext.h (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e818decb631f..e53822148b6a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -27,6 +27,7 @@ #include #include /* for struct resource */ +#include #include #include @@ -285,11 +286,6 @@ extern int pnpacpi_disabled; #define PXM_INVAL (-1) -struct resource_win { - struct resource res; - resource_size_t offset; -}; - bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res); bool acpi_dev_resource_io(struct acpi_resource *ares, struct resource *res); bool acpi_dev_resource_address_space(struct acpi_resource *ares, @@ -300,12 +296,6 @@ unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable); bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index, struct resource *res); -struct resource_list_entry { - struct list_head node; - struct resource res; - resource_size_t offset; -}; - void acpi_dev_free_resource_list(struct list_head *list); int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list, int (*preproc)(struct acpi_resource *, void *), diff --git a/include/linux/resource_ext.h b/include/linux/resource_ext.h new file mode 100644 index 000000000000..e2bf63d881d4 --- /dev/null +++ b/include/linux/resource_ext.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2015, Intel Corporation + * Author: Jiang Liu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#ifndef _LINUX_RESOURCE_EXT_H +#define _LINUX_RESOURCE_EXT_H +#include +#include +#include +#include + +/* Represent resource window for bridge devices */ +struct resource_win { + struct resource res; /* In master (CPU) address space */ + resource_size_t offset; /* Translation offset for bridge */ +}; + +/* + * Common resource list management data structure and interfaces to support + * ACPI, PNP and PCI host bridge etc. + */ +struct resource_entry { + struct list_head node; + struct resource *res; /* In master (CPU) address space */ + resource_size_t offset; /* Translation offset for bridge */ + struct resource __res; /* Default storage for res */ +}; + +extern struct resource_entry * +resource_list_create_entry(struct resource *res, size_t extra_size); +extern void resource_list_free(struct list_head *head); + +static inline void resource_list_add(struct resource_entry *entry, + struct list_head *head) +{ + list_add(&entry->node, head); +} + +static inline void resource_list_add_tail(struct resource_entry *entry, + struct list_head *head) +{ + list_add_tail(&entry->node, head); +} + +static inline void resource_list_del(struct resource_entry *entry) +{ + list_del(&entry->node); +} + +static inline void resource_list_free_entry(struct resource_entry *entry) +{ + kfree(entry); +} + +static inline void +resource_list_destroy_entry(struct resource_entry *entry) +{ + resource_list_del(entry); + resource_list_free_entry(entry); +} + +#define resource_list_for_each_entry(entry, list) \ + list_for_each_entry((entry), (list), node) + +#define resource_list_for_each_entry_safe(entry, tmp, list) \ + list_for_each_entry_safe((entry), (tmp), (list), node) + +#endif /* _LINUX_RESOURCE_EXT_H */ -- cgit v1.2.3 From 14d76b68f2819a1d0b50236a7e9e9f2ea69869d9 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 5 Feb 2015 13:44:44 +0800 Subject: PCI: Use common resource list management code instead of private implementation Use common resource list management data structure and interfaces instead of private implementation. Signed-off-by: Jiang Liu Acked-by: Will Deacon Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- include/linux/pci.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 9603094ed59b..faa60fa26314 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -397,16 +398,10 @@ static inline int pci_channel_offline(struct pci_dev *pdev) return (pdev->error_state != pci_channel_io_normal); } -struct pci_host_bridge_window { - struct list_head list; - struct resource *res; /* host bridge aperture (CPU address) */ - resource_size_t offset; /* bus address + offset = CPU address */ -}; - struct pci_host_bridge { struct device dev; struct pci_bus *bus; /* root bus */ - struct list_head windows; /* pci_host_bridge_windows */ + struct list_head windows; /* resource_entry */ void (*release_fn)(struct pci_host_bridge *); void *release_data; }; -- cgit v1.2.3 From ecf5636dcd59cd5508641f995cc4c2bafedbb995 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 5 Feb 2015 13:44:48 +0800 Subject: ACPI: Add interfaces to parse IOAPIC ID for IOAPIC hotplug We need to parse APIC ID for IOAPIC registration for IOAPIC hotplug. ACPI _MAT method and MADT table are used to figure out IOAPIC ID, just like parsing CPU APIC ID for CPU hotplug. [ tglx: Fixed docbook comment ] Signed-off-by: Yinghai Lu Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Borislav Petkov Cc: Len Brown Link: http://lkml.kernel.org/r/1414387308-27148-8-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e53822148b6a..24c7aa8b1d20 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -152,6 +152,10 @@ int acpi_map_cpu(acpi_handle handle, int physid, int *pcpu); int acpi_unmap_cpu(int cpu); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ +#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC +int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); +#endif + int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base); int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base); int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base); -- cgit v1.2.3 From a05d59a5673339ef6936d6940cdf68172ce75b9f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 6 Feb 2015 14:30:50 -0500 Subject: tracing: Add condition check to RCU lockdep checks The trace_tlb_flush() tracepoint can be called when a CPU is going offline. When a CPU is offline, RCU is no longer watching that CPU and since the tracepoint is protected by RCU, it must not be called. To prevent the tlb_flush tracepoint from being called when the CPU is offline, it was converted to a TRACE_EVENT_CONDITION where the condition checks if the CPU is online before calling the tracepoint. Unfortunately, this was not enough to stop lockdep from complaining about it. Even though the RCU protected code of the tracepoint will never be called, the condition is hidden within the tracepoint, and even though the condition prevents RCU code from being called, the lockdep checks are outside the tracepoint (this is to test tracepoints even when they are not enabled). Even though tracepoints should be checked to be RCU safe when they are not enabled, the condition should still be considered when checking RCU. Link: http://lkml.kernel.org/r/CA+icZUUGiGDoL5NU8RuxKzFjoLjEKRtUWx=JB8B9a0EQv-eGzQ@mail.gmail.com Fixes: 3a630178fd5f "tracing: generate RCU warnings even when tracepoints are disabled" Cc: stable@vger.kernel.org # 3.18+ Acked-by: Dave Hansen Reported-by: Sedat Dilek Tested-by: Sedat Dilek Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index e08e21e5f601..c72851328ca9 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -173,7 +173,7 @@ extern void syscall_unregfunc(void); TP_PROTO(data_proto), \ TP_ARGS(data_args), \ TP_CONDITION(cond),,); \ - if (IS_ENABLED(CONFIG_LOCKDEP)) { \ + if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ rcu_read_lock_sched_notrace(); \ rcu_dereference_sched(__tracepoint_##name.funcs);\ rcu_read_unlock_sched_notrace(); \ -- cgit v1.2.3 From a9b2c06dbef48ed31cff1764c5ce824829106f4f Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 6 Feb 2015 16:04:39 -0500 Subject: tcp: mitigate ACK loops for connections as tcp_request_sock In the SYN_RECV state, where the TCP connection is represented by tcp_request_sock, we now rate-limit SYNACKs in response to a client's retransmitted SYNs: we do not send a SYNACK in response to client SYN if it has been less than sysctl_tcp_invalid_ratelimit (default 500ms) since we last sent a SYNACK in response to a client's retransmitted SYN. This allows the vast majority of legitimate client connections to proceed unimpeded, even for the most aggressive platforms, iOS and MacOS, which actually retransmit SYNs 1-second intervals for several times in a row. They use SYN RTO timeouts following the progression: 1,1,1,1,1,2,4,8,16,32. Reported-by: Avery Fay Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 67309ece0772..bcc828d3b9b9 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -115,6 +115,7 @@ struct tcp_request_sock { u32 rcv_isn; u32 snt_isn; u32 snt_synack; /* synack sent time */ + u32 last_oow_ack_time; /* last SYNACK */ u32 rcv_nxt; /* the ack # by SYNACK. For * FastOpen it's the seq# * after data-in-SYN. -- cgit v1.2.3 From f2b2c582e82429270d5818fbabe653f4359d7024 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 6 Feb 2015 16:04:40 -0500 Subject: tcp: mitigate ACK loops for connections as tcp_sock Ensure that in state ESTABLISHED, where the connection is represented by a tcp_sock, we rate limit dupacks in response to incoming packets (a) with TCP timestamps that fail PAWS checks, or (b) with sequence numbers or ACK numbers that are out of the acceptable window. We do not send a dupack in response to out-of-window packets if it has been less than sysctl_tcp_invalid_ratelimit (default 500ms) since we last sent a dupack in response to an out-of-window packet. There is already a similar (although global) rate-limiting mechanism for "challenge ACKs". When deciding whether to send a challence ACK, we first consult the new per-connection rate limit, and then the global rate limit. Reported-by: Avery Fay Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index bcc828d3b9b9..66d85a80a1ec 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -153,6 +153,7 @@ struct tcp_sock { u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ + u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */ u32 tsoffset; /* timestamp offset */ -- cgit v1.2.3 From 4fb17a6091674f469e8ac85dc770fbf9a9ba7cc8 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 6 Feb 2015 16:04:41 -0500 Subject: tcp: mitigate ACK loops for connections as tcp_timewait_sock Ensure that in state FIN_WAIT2 or TIME_WAIT, where the connection is represented by a tcp_timewait_sock, we rate limit dupacks in response to incoming packets (a) with TCP timestamps that fail PAWS checks, or (b) with sequence numbers that are out of the acceptable window. We do not send a dupack in response to out-of-window packets if it has been less than sysctl_tcp_invalid_ratelimit (default 500ms) since we last sent a dupack in response to an out-of-window packet. Reported-by: Avery Fay Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 66d85a80a1ec..1a7adb411647 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -342,6 +342,10 @@ struct tcp_timewait_sock { u32 tw_rcv_wnd; u32 tw_ts_offset; u32 tw_ts_recent; + + /* The time we sent the last out-of-window ACK: */ + u32 tw_last_oow_ack_time; + long tw_ts_recent_stamp; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; -- cgit v1.2.3 From 096a4cfa5807aa89c78ce12309c0b1c10cf88184 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 6 Feb 2015 18:54:19 +0100 Subject: net: fix a typo in skb_checksum_validate_zero_check Remove trailing underscore. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 111e665455c3..1bb36edb66b9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3072,7 +3072,7 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto) #define skb_checksum_validate_zero_check(skb, proto, check, \ compute_pseudo) \ - __skb_checksum_validate_(skb, proto, true, true, check, compute_pseudo) + __skb_checksum_validate(skb, proto, true, true, check, compute_pseudo) #define skb_checksum_simple_validate(skb) \ __skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo) -- cgit v1.2.3 From 567e4b79731c352a17d73c483959f795d3593e03 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Feb 2015 12:59:01 -0800 Subject: net: rfs: add hash collision detection Receive Flow Steering is a nice solution but suffers from hash collisions when a mix of connected and unconnected traffic is received on the host, when flow hash table is populated. Also, clearing flow in inet_release() makes RFS not very good for short lived flows, as many packets can follow close(). (FIN , ACK packets, ...) This patch extends the information stored into global hash table to not only include cpu number, but upper part of the hash value. I use a 32bit value, and dynamically split it in two parts. For host with less than 64 possible cpus, this gives 6 bits for the cpu number, and 26 (32-6) bits for the upper part of the hash. Since hash bucket selection use low order bits of the hash, we have a full hash match, if /proc/sys/net/core/rps_sock_flow_entries is big enough. If the hash found in flow table does not match, we fallback to RPS (if it is enabled for the rxqueue). This means that a packet for an non connected flow can avoid the IPI through a unrelated/victim CPU. This also means we no longer have to clear the table at socket close time, and this helps short lived flows performance. Signed-off-by: Eric Dumazet Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ce784d5018e0..ab3b7cef4638 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -644,39 +644,39 @@ struct rps_dev_flow_table { /* * The rps_sock_flow_table contains mappings of flows to the last CPU * on which they were processed by the application (set in recvmsg). + * Each entry is a 32bit value. Upper part is the high order bits + * of flow hash, lower part is cpu number. + * rps_cpu_mask is used to partition the space, depending on number of + * possible cpus : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 + * For example, if 64 cpus are possible, rps_cpu_mask = 0x3f, + * meaning we use 32-6=26 bits for the hash. */ struct rps_sock_flow_table { - unsigned int mask; - u16 ents[0]; + u32 mask; + u32 ents[0]; }; -#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \ - ((_num) * sizeof(u16))) +#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) #define RPS_NO_CPU 0xffff +extern u32 rps_cpu_mask; +extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; + static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, u32 hash) { if (table && hash) { - unsigned int cpu, index = hash & table->mask; + unsigned int index = hash & table->mask; + u32 val = hash & ~rps_cpu_mask; /* We only give a hint, preemption can change cpu under us */ - cpu = raw_smp_processor_id(); + val |= raw_smp_processor_id(); - if (table->ents[index] != cpu) - table->ents[index] = cpu; + if (table->ents[index] != val) + table->ents[index] = val; } } -static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table, - u32 hash) -{ - if (table && hash) - table->ents[hash & table->mask] = RPS_NO_CPU; -} - -extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; - #ifdef CONFIG_RFS_ACCEL bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id); -- cgit v1.2.3 From 93c1af6ca94c1e763efba76a127b5c135e3d23a6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Feb 2015 20:39:13 -0800 Subject: net:rfs: adjust table size checking Make sure root user does not try something stupid. Also make sure mask field in struct rps_sock_flow_table does not share a cache line with the potentially often dirtied flow table. Signed-off-by: Eric Dumazet Fixes: 567e4b79731c ("net: rfs: add hash collision detection") Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ab3b7cef4638..d115256ed5a2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -653,7 +653,8 @@ struct rps_dev_flow_table { */ struct rps_sock_flow_table { u32 mask; - u32 ents[0]; + + u32 ents[0] ____cacheline_aligned_in_smp; }; #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) -- cgit v1.2.3 From 35f05dabf95ac3ebc4c15bafd6833f7a3046e66f Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 8 Feb 2015 11:49:34 +0200 Subject: IB/mlx4: Reset flow support for IB kernel ULPs The driver exposes interfaces that directly relate to HW state. Upon fatal error, consumers of these interfaces (ULPs) that rely on completion of all their posted work-request could hang, thereby introducing dependencies in shutdown order. To prevent this from happening, we manage the relevant resources (CQs, QPs) that are used by the device. Upon a fatal error, we now generate simulated completions for outstanding WQEs that were not completed at the time the HW was reset. It includes invoking the completion event handler for all involved CQs so that the ULPs will poll those CQs. When polled we return simulated CQEs with IB_WC_WR_FLUSH_ERR return code enabling ULPs to clean up their resources and not wait forever for completions upon receiving remove_one. The above change requires an extra check in the data path to make sure that when device is in error state, the simulated CQEs will be returned and no further WQEs will be posted. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c116cb02475c..e4ebff7e9d02 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -689,6 +689,8 @@ struct mlx4_cq { void (*comp)(struct mlx4_cq *); void *priv; } tasklet_ctx; + int reset_notify_added; + struct list_head reset_notify; }; struct mlx4_qp { -- cgit v1.2.3 From 3cf385713460eb2bb4cb7ceb8ed89833b00b594b Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Tue, 6 Jan 2015 11:15:11 +0100 Subject: ARM: 8256/1: driver coamba: add device binding path 'driver_override' As already demonstrated with PCI [1] and the platform bus [2], a driver_override property in sysfs can be used to bypass the id matching of a device to a AMBA driver. This can be used by VFIO to bind to any AMBA device requested by the user. [1] http://lists-archives.com/linux-kernel/28030441-pci-introduce-new-device-binding-path-using-pci_dev-driver_override.html [2] https://www.redhat.com/archives/libvir-list/2014-April/msg00382.html Signed-off-by: Antonios Motakis Reviewed-by: Kim Phillips Signed-off-by: Russell King --- include/linux/amba/bus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 0ab5f8e0dea2..50fc66868402 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -33,6 +33,7 @@ struct amba_device { struct clk *pclk; unsigned int periphid; unsigned int irq[AMBA_NR_IRQS]; + char *driver_override; }; struct amba_driver { -- cgit v1.2.3 From 6ee8e25fc3e916193bce4ebb43d5439e1e2144ab Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 10 Feb 2015 14:08:32 -0800 Subject: fsnotify: fix handling of renames in audit Commit e9fd702a58c4 ("audit: convert audit watches to use fsnotify instead of inotify") broke handling of renames in audit. Audit code wants to update inode number of an inode corresponding to watched name in a directory. When something gets renamed into a directory to a watched name, inotify previously passed moved inode to audit code however new fsnotify code passes directory inode where the change happened. That confuses audit and it starts watching parent directory instead of a file in a directory. This can be observed for example by doing: cd /tmp touch foo bar auditctl -w /tmp/foo touch foo mv bar foo touch foo In audit log we see events like: type=CONFIG_CHANGE msg=audit(1423563584.155:90): auid=1000 ses=2 op="updated rules" path="/tmp/foo" key=(null) list=4 res=1 ... type=PATH msg=audit(1423563584.155:91): item=2 name="bar" inode=1046884 dev=08:0 2 mode=0100644 ouid=0 ogid=0 rdev=00:00 nametype=DELETE type=PATH msg=audit(1423563584.155:91): item=3 name="foo" inode=1046842 dev=08:0 2 mode=0100644 ouid=0 ogid=0 rdev=00:00 nametype=DELETE type=PATH msg=audit(1423563584.155:91): item=4 name="foo" inode=1046884 dev=08:0 2 mode=0100644 ouid=0 ogid=0 rdev=00:00 nametype=CREATE ... and that's it - we see event for the first touch after creating the audit rule, we see events for rename but we don't see any event for the last touch. However we start seeing events for unrelated stuff happening in /tmp. Fix the problem by passing moved inode as data in the FS_MOVED_FROM and FS_MOVED_TO events instead of the directory where the change happens. This doesn't introduce any new problems because noone besides audit_watch.c cares about the passed value: fs/notify/fanotify/fanotify.c cares only about FSNOTIFY_EVENT_PATH events. fs/notify/dnotify/dnotify.c doesn't care about passed 'data' value at all. fs/notify/inotify/inotify_fsnotify.c uses 'data' only for FSNOTIFY_EVENT_PATH. kernel/audit_tree.c doesn't care about passed 'data' at all. kernel/audit_watch.c expects moved inode as 'data'. Fixes: e9fd702a58c49db ("audit: convert audit watches to use fsnotify instead of inotify") Signed-off-by: Jan Kara Cc: Paul Moore Cc: Eric Paris Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fsnotify.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 1c804b057fb1..7ee1774edee5 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -101,8 +101,10 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, new_dir_mask |= FS_ISDIR; } - fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name, fs_cookie); - fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name, fs_cookie); + fsnotify(old_dir, old_dir_mask, source, FSNOTIFY_EVENT_INODE, old_name, + fs_cookie); + fsnotify(new_dir, new_dir_mask, source, FSNOTIFY_EVENT_INODE, new_name, + fs_cookie); if (target) fsnotify_link_count(target); -- cgit v1.2.3 From ccaafd7fd039aebc9359a9799f8558b01f1c2adc Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 10 Feb 2015 14:09:35 -0800 Subject: mm: don't use compound_head() in virt_to_head_page() compound_head() is implemented with assumption that there would be race condition when checking tail flag. This assumption is only true when we try to access arbitrary positioned struct page. The situation that virt_to_head_page() is called is different case. We call virt_to_head_page() only in the range of allocated pages, so there is no race condition on tail flag. In this case, we don't need to handle race condition and we can reduce overhead slightly. This patch implements compound_head_fast() which is similar with compound_head() except tail flag race handling. And then, virt_to_head_page() uses this optimized function to improve performance. I saw 1.8% win in a fast-path loop over kmem_cache_alloc/free, (14.063 ns -> 13.810 ns) if target object is on tail page. Signed-off-by: Joonsoo Kim Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Jesper Dangaard Brouer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index dd5ea3016fc4..2c6fd3c5424a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -446,6 +446,12 @@ static inline struct page *compound_head_by_tail(struct page *tail) return tail; } +/* + * Since either compound page could be dismantled asynchronously in THP + * or we access asynchronously arbitrary positioned struct page, there + * would be tail flag race. To handle this race, we should call + * smp_rmb() before checking tail flag. compound_head_by_tail() did it. + */ static inline struct page *compound_head(struct page *page) { if (unlikely(PageTail(page))) @@ -453,6 +459,18 @@ static inline struct page *compound_head(struct page *page) return page; } +/* + * If we access compound page synchronously such as access to + * allocated page, there is no need to handle tail flag race, so we can + * check tail flag directly without any synchronization primitive. + */ +static inline struct page *compound_head_fast(struct page *page) +{ + if (unlikely(PageTail(page))) + return page->first_page; + return page; +} + /* * The atomic page->_mapcount, starts from -1: so that transitions * both from it and to it can be tracked, using atomic_inc_and_test @@ -531,7 +549,14 @@ static inline void get_page(struct page *page) static inline struct page *virt_to_head_page(const void *x) { struct page *page = virt_to_page(x); - return compound_head(page); + + /* + * We don't need to worry about synchronization of tail flag + * when we call virt_to_head_page() since it is only called for + * already allocated page and this page won't be freed until + * this virt_to_head_page() is finished. So use _fast variant. + */ + return compound_head_fast(page); } /* -- cgit v1.2.3 From c8d78c1823f46519473949d33f0d1d33fe21ea16 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 10 Feb 2015 14:09:46 -0800 Subject: mm: replace remap_file_pages() syscall with emulation remap_file_pages(2) was invented to be able efficiently map parts of huge file into limited 32-bit virtual address space such as in database workloads. Nonlinear mappings are pain to support and it seems there's no legitimate use-cases nowadays since 64-bit systems are widely available. Let's drop it and get rid of all these special-cased code. The patch replaces the syscall with emulation which creates new VMA on each remap_file_pages(), unless they it can be merged with an adjacent one. I didn't find *any* real code that uses remap_file_pages(2) to test emulation impact on. I've checked Debian code search and source of all packages in ALT Linux. No real users: libc wrappers, mentions in strace, gdb, valgrind and this kind of stuff. There are few basic tests in LTP for the syscall. They work just fine with emulation. To test performance impact, I've written small test case which demonstrate pretty much worst case scenario: map 4G shmfs file, write to begin of every page pgoff of the page, remap pages in reverse order, read every page. The test creates 1 million of VMAs if emulation is in use, so I had to set vm.max_map_count to 1100000 to avoid -ENOMEM. Before: 23.3 ( +- 4.31% ) seconds After: 43.9 ( +- 0.85% ) seconds Slowdown: 1.88x I believe we can live with that. Test case: #define _GNU_SOURCE #include #include #include #include #define MB (1024UL * 1024) #define SIZE (4096 * MB) int main(int argc, char **argv) { unsigned long *p; long i, pass; for (pass = 0; pass < 10; pass++) { p = mmap(NULL, SIZE, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) { perror("mmap"); return -1; } for (i = 0; i < SIZE / 4096; i++) p[i * 4096 / sizeof(*p)] = i; for (i = 0; i < SIZE / 4096; i++) { if (remap_file_pages(p + i * 4096 / sizeof(*p), 4096, 0, (SIZE - 4096 * (i + 1)) >> 12, 0)) { perror("remap_file_pages"); return -1; } } for (i = SIZE / 4096 - 1; i >= 0; i--) assert(p[i * 4096 / sizeof(*p)] == SIZE / 4096 - i - 1); munmap(p, SIZE); } return 0; } [akpm@linux-foundation.org: fix spello] [sasha.levin@oracle.com: initialize populate before usage] [sasha.levin@oracle.com: grab file ref to prevent race while mmaping] Signed-off-by: "Kirill A. Shutemov" Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Dave Jones Cc: Linus Torvalds Cc: Armin Rigo Signed-off-by: Sasha Levin Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 42efe13077b6..60c4996df7f3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2481,8 +2481,12 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); -extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, - unsigned long size, pgoff_t pgoff); +static inline int generic_file_remap_pages(struct vm_area_struct *vma, + unsigned long addr, unsigned long size, pgoff_t pgoff) +{ + BUG(); + return 0; +} int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); -- cgit v1.2.3 From 8a5f14a23177061ec11daeaa3d09d0765d785c47 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 10 Feb 2015 14:09:49 -0800 Subject: mm: drop support of non-linear mapping from unmap/zap codepath We have remap_file_pages(2) emulation in -mm tree for few release cycles and we plan to have it mainline in v3.20. This patchset removes rest of VM_NONLINEAR infrastructure. Patches 1-8 take care about generic code. They are pretty straight-forward and can be applied without other of patches. Rest patches removes pte_file()-related stuff from architecture-specific code. It usually frees up one bit in non-present pte. I've tried to reuse that bit for swap offset, where I was able to figure out how to do that. For obvious reason I cannot test all that arch-specific code and would like to see acks from maintainers. In total, remap_file_pages(2) required about 1.4K lines of not-so-trivial kernel code. That's too much for functionality nobody uses. Tested-by: Felipe Balbi This patch (of 38): We don't create non-linear mappings anymore. Let's drop code which handles them on unmap/zap. Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2c6fd3c5424a..600ef5ed4698 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1146,7 +1146,6 @@ extern void user_shm_unlock(size_t, struct user_struct *); * Parameter block passed down to zap_pte_range in exceptional cases. */ struct zap_details { - struct vm_area_struct *nonlinear_vma; /* Check page->index if set */ struct address_space *check_mapping; /* Check page->mapping if set */ pgoff_t first_index; /* Lowest page->index to unmap */ pgoff_t last_index; /* Highest page->index to unmap */ -- cgit v1.2.3 From 9b4bdd2ffab9557ac43af7dff02e7dab1c8c58bd Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 10 Feb 2015 14:09:51 -0800 Subject: mm: drop support of non-linear mapping from fault codepath We don't create non-linear mappings anymore. Let's drop code which handles them on page fault. Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 600ef5ed4698..376e5c325dee 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -206,21 +206,19 @@ extern unsigned int kobjsize(const void *objp); extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ -#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ -#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ -#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */ -#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ -#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ -#define FAULT_FLAG_TRIED 0x40 /* second try */ -#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */ +#define FAULT_FLAG_MKWRITE 0x02 /* Fault was mkwrite of existing pte */ +#define FAULT_FLAG_ALLOW_RETRY 0x04 /* Retry fault if blocking */ +#define FAULT_FLAG_RETRY_NOWAIT 0x08 /* Don't drop mmap_sem and wait when retrying */ +#define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */ +#define FAULT_FLAG_TRIED 0x20 /* Second try */ +#define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */ /* * vm_fault is filled by the the pagefault handler and passed to the vma's * ->fault function. The vma's ->fault is responsible for returning a bitmask * of VM_FAULT_xxx flags that give details about how the fault was handled. * - * pgoff should be used in favour of virtual_address, if possible. If pgoff - * is used, one may implement ->remap_pages to get nonlinear mapping support. + * pgoff should be used in favour of virtual_address, if possible. */ struct vm_fault { unsigned int flags; /* FAULT_FLAG_xxx flags */ -- cgit v1.2.3 From d83a08db5ba6072caa658745881f4baa9bad6a08 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 10 Feb 2015 14:09:54 -0800 Subject: mm: drop vm_ops->remap_pages and generic_file_remap_pages() stub Nobody uses it anymore. [akpm@linux-foundation.org: fix filemap_xip.c] Signed-off-by: Kirill A. Shutemov Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 6 ------ include/linux/mm.h | 3 --- 2 files changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 60c4996df7f3..47f557c7ef7e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2481,12 +2481,6 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); -static inline int generic_file_remap_pages(struct vm_area_struct *vma, - unsigned long addr, unsigned long size, pgoff_t pgoff) -{ - BUG(); - return 0; -} int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/include/linux/mm.h b/include/linux/mm.h index 376e5c325dee..2ddd9d1d6268 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -285,9 +285,6 @@ struct vm_operations_struct { struct mempolicy *(*get_policy)(struct vm_area_struct *vma, unsigned long addr); #endif - /* called by sys_remap_file_pages() to populate non-linear mapping */ - int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr, - unsigned long size, pgoff_t pgoff); }; struct mmu_gather; -- cgit v1.2.3 From 27ba0644ea9dfe6e7693abc85837b60e40583b96 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 10 Feb 2015 14:09:59 -0800 Subject: rmap: drop support of non-linear mappings We don't create non-linear mappings anymore. Let's drop code which handles them in rmap. Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 +--- include/linux/mm.h | 6 ------ include/linux/mm_types.h | 4 +--- include/linux/rmap.h | 2 -- 4 files changed, 2 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 47f557c7ef7e..60acab209701 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -401,7 +401,6 @@ struct address_space { spinlock_t tree_lock; /* and lock protecting it */ atomic_t i_mmap_writable;/* count VM_SHARED mappings */ struct rb_root i_mmap; /* tree of private and shared mappings */ - struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */ /* Protected by tree_lock together with the radix tree */ unsigned long nrpages; /* number of total pages */ @@ -493,8 +492,7 @@ static inline void i_mmap_unlock_read(struct address_space *mapping) */ static inline int mapping_mapped(struct address_space *mapping) { - return !RB_EMPTY_ROOT(&mapping->i_mmap) || - !list_empty(&mapping->i_mmap_nonlinear); + return !RB_EMPTY_ROOT(&mapping->i_mmap); } /* diff --git a/include/linux/mm.h b/include/linux/mm.h index 2ddd9d1d6268..18391eec4864 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1796,12 +1796,6 @@ struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node, for (vma = vma_interval_tree_iter_first(root, start, last); \ vma; vma = vma_interval_tree_iter_next(vma, start, last)) -static inline void vma_nonlinear_insert(struct vm_area_struct *vma, - struct list_head *list) -{ - list_add_tail(&vma->shared.nonlinear, list); -} - void anon_vma_interval_tree_insert(struct anon_vma_chain *node, struct rb_root *root); void anon_vma_interval_tree_remove(struct anon_vma_chain *node, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6d34aa266a8c..3b1d20fb0848 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -273,15 +273,13 @@ struct vm_area_struct { /* * For areas with an address space and backing store, - * linkage into the address_space->i_mmap interval tree, or - * linkage of vma in the address_space->i_mmap_nonlinear list. + * linkage into the address_space->i_mmap interval tree. */ union { struct { struct rb_node rb; unsigned long rb_subtree_last; } linear; - struct list_head nonlinear; } shared; /* diff --git a/include/linux/rmap.h b/include/linux/rmap.h index d9d7e7e56352..b38f559130d5 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -246,7 +246,6 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); * arg: passed to rmap_one() and invalid_vma() * rmap_one: executed on each vma where page is mapped * done: for checking traversing termination condition - * file_nonlinear: for handling file nonlinear mapping * anon_lock: for getting anon_lock by optimized way rather than default * invalid_vma: for skipping uninterested vma */ @@ -255,7 +254,6 @@ struct rmap_walk_control { int (*rmap_one)(struct page *page, struct vm_area_struct *vma, unsigned long addr, void *arg); int (*done)(struct page *page); - int (*file_nonlinear)(struct page *, struct address_space *, void *arg); struct anon_vma *(*anon_lock)(struct page *page); bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); }; -- cgit v1.2.3 From ac51b934f3912582d3c897c6c4d09b32ea57b2c7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 10 Feb 2015 14:10:02 -0800 Subject: mm: replace vma->sharead.linear with vma->shared After removing vma->shared.nonlinear we have only one member of vma->shared union, which doesn't make much sense. This patch drops the union and move struct vma->shared.linear to vma->shared. Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3b1d20fb0848..07c8bd3f7b48 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -275,11 +275,9 @@ struct vm_area_struct { * For areas with an address space and backing store, * linkage into the address_space->i_mmap interval tree. */ - union { - struct { - struct rb_node rb; - unsigned long rb_subtree_last; - } linear; + struct { + struct rb_node rb; + unsigned long rb_subtree_last; } shared; /* -- cgit v1.2.3 From 0661a33611fca12570cba48d9344ce68834ee86c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 10 Feb 2015 14:10:04 -0800 Subject: mm: remove rest usage of VM_NONLINEAR and pte_file() One bit in ->vm_flags is unused now! Signed-off-by: Kirill A. Shutemov Cc: Dan Carpenter Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - include/linux/swapops.h | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 18391eec4864..a0da685bdb82 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -138,7 +138,6 @@ extern unsigned int kobjsize(const void *objp); #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ -#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ #define VM_ARCH_2 0x02000000 #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 6adfb7bfbf44..50cbc876be56 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -54,7 +54,7 @@ static inline pgoff_t swp_offset(swp_entry_t entry) /* check whether a pte points to a swap entry */ static inline int is_swap_pte(pte_t pte) { - return !pte_none(pte) && !pte_present_nonuma(pte) && !pte_file(pte); + return !pte_none(pte) && !pte_present_nonuma(pte); } #endif @@ -66,7 +66,6 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte) { swp_entry_t arch_entry; - BUG_ON(pte_file(pte)); if (pte_swp_soft_dirty(pte)) pte = pte_swp_clear_soft_dirty(pte); arch_entry = __pte_to_swp_entry(pte); @@ -82,7 +81,6 @@ static inline pte_t swp_entry_to_pte(swp_entry_t entry) swp_entry_t arch_entry; arch_entry = __swp_entry(swp_type(entry), swp_offset(entry)); - BUG_ON(pte_file(__swp_entry_to_pte(arch_entry))); return __swp_entry_to_pte(arch_entry); } -- cgit v1.2.3 From 753162cd849c45580fb5aaa7f3597c81e74e391c Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 10 Feb 2015 14:11:36 -0800 Subject: mm: hugetlb: fix type of hugetlb_treat_as_movable variable hugetlb_treat_as_movable declared as unsigned long, but proc_dointvec() used for parsing it: static struct ctl_table vm_table[] = { ... { .procname = "hugepages_treat_as_movable", .data = &hugepages_treat_as_movable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, This seems harmless, but it's better to use int type here. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Manfred Spraul Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 431b7fc605c9..7d7856359920 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -86,7 +86,7 @@ void free_huge_page(struct page *page); pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); #endif -extern unsigned long hugepages_treat_as_movable; +extern int hugepages_treat_as_movable; extern int sysctl_hugetlb_shm_group; extern struct list_head huge_boot_pages; -- cgit v1.2.3 From dbf22eb6d8675fc173154d9f1bd1bd0fda53a001 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Tue, 10 Feb 2015 14:11:41 -0800 Subject: memcg: zap __memcg_{charge,uncharge}_slab They are simple wrappers around memcg_{charge,uncharge}_kmem, so let's zap them and call these functions directly. Signed-off-by: Vladimir Davydov Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7c95af8d552c..18ccb2988979 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -403,8 +403,9 @@ void memcg_update_array_size(int num_groups); struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep); void __memcg_kmem_put_cache(struct kmem_cache *cachep); -int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order); -void __memcg_uncharge_slab(struct kmem_cache *cachep, int order); +int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, + unsigned long nr_pages); +void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages); int __memcg_cleanup_cache_params(struct kmem_cache *s); -- cgit v1.2.3 From 3e0350a36414a73c5c2d1e354f8c0ab4ace1296d Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Tue, 10 Feb 2015 14:11:44 -0800 Subject: memcg: zap memcg_name argument of memcg_create_kmem_cache Instead of passing the name of the memory cgroup which the cache is created for in the memcg_name_argument, let's obtain it immediately in memcg_create_kmem_cache. Signed-off-by: Vladimir Davydov Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 9a139b637069..eca9ed303a1b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -117,8 +117,7 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, void (*)(void *)); #ifdef CONFIG_MEMCG_KMEM struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *, - struct kmem_cache *, - const char *); + struct kmem_cache *); #endif void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); -- cgit v1.2.3 From d5b3cf7139b8770af4ed8bb36a1ab9d290ac39e9 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Tue, 10 Feb 2015 14:11:47 -0800 Subject: memcg: zap memcg_slab_caches and memcg_slab_mutex mem_cgroup->memcg_slab_caches is a list of kmem caches corresponding to the given cgroup. Currently, it is only used on css free in order to destroy all caches corresponding to the memory cgroup being freed. The list is protected by memcg_slab_mutex. The mutex is also used to protect kmem_cache->memcg_params->memcg_caches arrays and synchronizes kmem_cache_destroy vs memcg_unregister_all_caches. However, we can perfectly get on without these two. To destroy all caches corresponding to a memory cgroup, we can walk over the global list of kmem caches, slab_caches, and we can do all the synchronization stuff using the slab_mutex instead of the memcg_slab_mutex. This patch therefore gets rid of the memcg_slab_caches and memcg_slab_mutex. Apart from this nice cleanup, it also: - assures that rcu_barrier() is called once at max when a root cache is destroyed or a memory cgroup is freed, no matter how many caches have SLAB_DESTROY_BY_RCU flag set; - fixes the race between kmem_cache_destroy and kmem_cache_create that exists, because memcg_cleanup_cache_params, which is called from kmem_cache_destroy after checking that kmem_cache->refcount=0, releases the slab_mutex, which gives kmem_cache_create a chance to make an alias to a cache doomed to be destroyed. Signed-off-by: Vladimir Davydov Cc: Johannes Weiner Cc: Michal Hocko Acked-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 -- include/linux/slab.h | 6 ++---- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 18ccb2988979..fb212e1d700d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -407,8 +407,6 @@ int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, unsigned long nr_pages); void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages); -int __memcg_cleanup_cache_params(struct kmem_cache *s); - /** * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. * @gfp: the gfp allocation flags. diff --git a/include/linux/slab.h b/include/linux/slab.h index eca9ed303a1b..2e3b448cfa2d 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -116,8 +116,8 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, unsigned long, void (*)(void *)); #ifdef CONFIG_MEMCG_KMEM -struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *, - struct kmem_cache *); +void memcg_create_kmem_cache(struct mem_cgroup *, struct kmem_cache *); +void memcg_destroy_kmem_caches(struct mem_cgroup *); #endif void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); @@ -490,7 +490,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) * Child caches will hold extra metadata needed for its operation. Fields are: * * @memcg: pointer to the memcg this cache belongs to - * @list: list_head for the list of all caches in this memcg * @root_cache: pointer to the global, root cache, this cache was derived from */ struct memcg_cache_params { @@ -502,7 +501,6 @@ struct memcg_cache_params { }; struct { struct mem_cgroup *memcg; - struct list_head list; struct kmem_cache *root_cache; }; }; -- cgit v1.2.3 From e4b294c2d8f73af4cd41ff30638ad0e4769dc56a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 11 Feb 2015 15:24:46 -0800 Subject: mm: add fields for compound destructor and order into struct page Currently, we use lru.next/lru.prev plus cast to access or set destructor and order of compound page. Let's replace it with explicit fields in struct page. Signed-off-by: Kirill A. Shutemov Acked-by: Jerome Marchand Acked-by: Christoph Lameter Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 9 ++++----- include/linux/mm_types.h | 8 ++++++++ 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 65db4aee738a..8dd4fde9d2e5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -627,29 +627,28 @@ int split_free_page(struct page *page); * prototype for that function and accessor functions. * These are _only_ valid on the head of a PG_compound page. */ -typedef void compound_page_dtor(struct page *); static inline void set_compound_page_dtor(struct page *page, compound_page_dtor *dtor) { - page[1].lru.next = (void *)dtor; + page[1].compound_dtor = dtor; } static inline compound_page_dtor *get_compound_page_dtor(struct page *page) { - return (compound_page_dtor *)page[1].lru.next; + return page[1].compound_dtor; } static inline int compound_order(struct page *page) { if (!PageHead(page)) return 0; - return (unsigned long)page[1].lru.prev; + return page[1].compound_order; } static inline void set_compound_order(struct page *page, unsigned long order) { - page[1].lru.prev = (void *)order; + page[1].compound_order = order; } #ifdef CONFIG_MMU diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 07c8bd3f7b48..20ff2105b564 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -28,6 +28,8 @@ struct mem_cgroup; IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) #define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) +typedef void compound_page_dtor(struct page *); + /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -142,6 +144,12 @@ struct page { struct rcu_head rcu_head; /* Used by SLAB * when destroying via RCU */ + /* First tail page of compound page */ + struct { + compound_page_dtor *compound_dtor; + unsigned long compound_order; + }; + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS pgtable_t pmd_huge_pte; /* protected by page->ptl */ #endif -- cgit v1.2.3 From 1d148e218a0d0566b1c06f2f45f1436d53b049b2 Mon Sep 17 00:00:00 2001 From: "Wang, Yalin" Date: Wed, 11 Feb 2015 15:24:48 -0800 Subject: mm: add VM_BUG_ON_PAGE() to page_mapcount() Add VM_BUG_ON_PAGE() for slab pages. _mapcount is an union with slab struct in struct page, so we must avoid accessing _mapcount if this page is a slab page. Also remove the unneeded bracket. Signed-off-by: Yalin Wang Acked-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8dd4fde9d2e5..c6bf813a6b3d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -484,7 +484,8 @@ static inline void page_mapcount_reset(struct page *page) static inline int page_mapcount(struct page *page) { - return atomic_read(&(page)->_mapcount) + 1; + VM_BUG_ON_PAGE(PageSlab(page), page); + return atomic_read(&page->_mapcount) + 1; } static inline int page_count(struct page *page) -- cgit v1.2.3 From 56873f43abdcd574b25105867a990f067747b2f4 Mon Sep 17 00:00:00 2001 From: "Wang, Yalin" Date: Wed, 11 Feb 2015 15:24:51 -0800 Subject: mm:add KPF_ZERO_PAGE flag for /proc/kpageflags Add KPF_ZERO_PAGE flag for zero_page, so that userspace processes can detect zero_page in /proc/kpageflags, and then do memory analysis more accurately. Signed-off-by: Yalin Wang Acked-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ad9051bab267..f10b20f05159 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -157,6 +157,13 @@ static inline int hpage_nr_pages(struct page *page) extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t pmd, pmd_t *pmdp); +extern struct page *huge_zero_page; + +static inline bool is_huge_zero_page(struct page *page) +{ + return ACCESS_ONCE(huge_zero_page) == page; +} + #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) @@ -206,6 +213,11 @@ static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_str return 0; } +static inline bool is_huge_zero_page(struct page *page) +{ + return false; +} + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* _LINUX_HUGE_MM_H */ -- cgit v1.2.3 From 93aa7d95248d04b934eb8e89717c7b8d6400bf2b Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 11 Feb 2015 15:24:59 -0800 Subject: swap: remove unused mem_cgroup_uncharge_swapcache declaration The body of this function was removed by commit 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API"). Signed-off-by: Vladimir Davydov Acked-by: Michal Hocko Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 34e8b60ab973..7067eca501e2 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -437,16 +437,6 @@ extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; -#ifdef CONFIG_MEMCG -extern void -mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout); -#else -static inline void -mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) -{ -} -#endif - #else /* CONFIG_SWAP */ #define swap_address_space(entry) (NULL) @@ -547,11 +537,6 @@ static inline swp_entry_t get_swap_page(void) return entry; } -static inline void -mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) -{ -} - #endif /* CONFIG_SWAP */ #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ -- cgit v1.2.3 From 6de226191d12fce30331ebf024ca3ed24834f0ee Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 11 Feb 2015 15:25:01 -0800 Subject: mm: memcontrol: track move_lock state internally The complexity of memcg page stat synchronization is currently leaking into the callsites, forcing them to keep track of the move_lock state and the IRQ flags. Simplify the API by tracking it in the memcg. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Reviewed-by: Vladimir Davydov Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index fb212e1d700d..76b4084b8d08 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -138,12 +138,10 @@ static inline bool mem_cgroup_disabled(void) return false; } -struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked, - unsigned long *flags); -void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool *locked, - unsigned long *flags); +struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page); void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx, int val); +void mem_cgroup_end_page_stat(struct mem_cgroup *memcg); static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) @@ -285,14 +283,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, - bool *locked, unsigned long *flags) +static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page) { return NULL; } -static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, - bool *locked, unsigned long *flags) +static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) { } -- cgit v1.2.3 From 44628d9755e249aab9a6e1a17407d2f4278047ee Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 11 Feb 2015 15:25:10 -0800 Subject: mm: fix typo of MIGRATE_RESERVE in comment Found it when I want to jump to the definition of MIGRATE_RESERVE ctags. Signed-off-by: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2f0856d14b21..b41829701334 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -426,7 +426,7 @@ struct zone { const char *name; /* - * Number of MIGRATE_RESEVE page block. To maintain for just + * Number of MIGRATE_RESERVE page block. To maintain for just * optimization. Protected by zone->lock. */ int nr_migrate_reserve_block; -- cgit v1.2.3 From e66f17ff71772b209eed39de35aaa99ba819c93d Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:25:22 -0800 Subject: mm/hugetlb: take page table lock in follow_huge_pmd() We have a race condition between move_pages() and freeing hugepages, where move_pages() calls follow_page(FOLL_GET) for hugepages internally and tries to get its refcount without preventing concurrent freeing. This race crashes the kernel, so this patch fixes it by moving FOLL_GET code for hugepages into follow_huge_pmd() with taking the page table lock. This patch intentionally removes page==NULL check after pte_page. This is justified because pte_page() never returns NULL for any architectures or configurations. This patch changes the behavior of follow_huge_pmd() for tail pages and then tail pages can be pinned/returned. So the caller must be changed to properly handle the returned tail pages. We could have a choice to add the similar locking to follow_huge_(addr|pud) for consistency, but it's not necessary because currently these functions don't support FOLL_GET flag, so let's leave it for future development. Here is the reproducer: $ cat movepages.c #include #include #include #define ADDR_INPUT 0x700000000000UL #define HPS 0x200000 #define PS 0x1000 int main(int argc, char *argv[]) { int i; int nr_hp = strtol(argv[1], NULL, 0); int nr_p = nr_hp * HPS / PS; int ret; void **addrs; int *status; int *nodes; pid_t pid; pid = strtol(argv[2], NULL, 0); addrs = malloc(sizeof(char *) * nr_p + 1); status = malloc(sizeof(char *) * nr_p + 1); nodes = malloc(sizeof(char *) * nr_p + 1); while (1) { for (i = 0; i < nr_p; i++) { addrs[i] = (void *)ADDR_INPUT + i * PS; nodes[i] = 1; status[i] = 0; } ret = numa_move_pages(pid, nr_p, addrs, nodes, status, MPOL_MF_MOVE_ALL); if (ret == -1) err("move_pages"); for (i = 0; i < nr_p; i++) { addrs[i] = (void *)ADDR_INPUT + i * PS; nodes[i] = 0; status[i] = 0; } ret = numa_move_pages(pid, nr_p, addrs, nodes, status, MPOL_MF_MOVE_ALL); if (ret == -1) err("move_pages"); } return 0; } $ cat hugepage.c #include #include #include #define ADDR_INPUT 0x700000000000UL #define HPS 0x200000 int main(int argc, char *argv[]) { int nr_hp = strtol(argv[1], NULL, 0); char *p; while (1) { p = mmap((void *)ADDR_INPUT, nr_hp * HPS, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); if (p != (void *)ADDR_INPUT) { perror("mmap"); break; } memset(p, 0, nr_hp * HPS); munmap(p, nr_hp * HPS); } } $ sysctl vm.nr_hugepages=40 $ ./hugepage 10 & $ ./movepages 10 $(pgrep -f hugepage) Fixes: e632a938d914 ("mm: migrate: add hugepage migration code to move_pages()") Signed-off-by: Naoya Horiguchi Reported-by: Hugh Dickins Cc: James Hogan Cc: David Rientjes Cc: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: Andrea Arcangeli Cc: Luiz Capitulino Cc: Nishanth Aravamudan Cc: Lee Schermerhorn Cc: Steve Capper Cc: [3.12+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 8 ++++---- include/linux/swapops.h | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 7d7856359920..7b5785032049 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -99,9 +99,9 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write); struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write); + pmd_t *pmd, int flags); struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write); + pud_t *pud, int flags); int pmd_huge(pmd_t pmd); int pud_huge(pud_t pmd); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, @@ -133,8 +133,8 @@ static inline void hugetlb_report_meminfo(struct seq_file *m) static inline void hugetlb_show_meminfo(void) { } -#define follow_huge_pmd(mm, addr, pmd, write) NULL -#define follow_huge_pud(mm, addr, pud, write) NULL +#define follow_huge_pmd(mm, addr, pmd, flags) NULL +#define follow_huge_pud(mm, addr, pud, flags) NULL #define prepare_hugepage_range(file, addr, len) (-EINVAL) #define pmd_huge(x) 0 #define pud_huge(x) 0 diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 50cbc876be56..831a3168ab35 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -135,6 +135,8 @@ static inline void make_migration_entry_read(swp_entry_t *entry) *entry = swp_entry(SWP_MIGRATION_READ, swp_offset(*entry)); } +extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, + spinlock_t *ptl); extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); extern void migration_entry_wait_huge(struct vm_area_struct *vma, @@ -148,6 +150,8 @@ static inline int is_migration_entry(swp_entry_t swp) } #define migration_entry_to_page(swp) NULL static inline void make_migration_entry_read(swp_entry_t *entryp) { } +static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, + spinlock_t *ptl) { } static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } static inline void migration_entry_wait_huge(struct vm_area_struct *vma, -- cgit v1.2.3 From 1a6d53a105406d97396c87511afd6f09b4dc8ad2 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2015 15:25:44 -0800 Subject: mm: reduce try_to_compact_pages parameters Expand the usage of the struct alloc_context introduced in the previous patch also for calling try_to_compact_pages(), to reduce the number of its parameters. Since the function is in different compilation unit, we need to move alloc_context definition in the shared mm/internal.h header. With this change we get simpler code and small savings of code size and stack usage: add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-27 (-27) function old new delta __alloc_pages_direct_compact 283 256 -27 add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-13 (-13) function old new delta try_to_compact_pages 582 569 -13 Stack usage of __alloc_pages_direct_compact goes from 24 to none (per scripts/checkstack.pl). Signed-off-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Mel Gorman Cc: Zhang Yanfei Cc: Minchan Kim Cc: David Rientjes Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 3238ffa33f68..f2efda2e6ac6 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -21,6 +21,8 @@ /* Zone lock or lru_lock was contended in async compaction */ #define COMPACT_CONTENDED_LOCK 2 +struct alloc_context; /* in mm/internal.h */ + #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, @@ -30,10 +32,9 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); extern int fragmentation_index(struct zone *zone, unsigned int order); -extern unsigned long try_to_compact_pages(struct zonelist *zonelist, - int order, gfp_t gfp_mask, nodemask_t *mask, - enum migrate_mode mode, int *contended, - int alloc_flags, int classzone_idx); +extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, + int alloc_flags, const struct alloc_context *ac, + enum migrate_mode mode, int *contended); extern void compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); extern unsigned long compaction_suitable(struct zone *zone, int order, @@ -101,10 +102,10 @@ static inline bool compaction_restarting(struct zone *zone, int order) } #else -static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, - int order, gfp_t gfp_mask, nodemask_t *nodemask, - enum migrate_mode mode, int *contended, - int alloc_flags, int classzone_idx) +static inline unsigned long try_to_compact_pages(gfp_t gfp_mask, + unsigned int order, int alloc_flags, + const struct alloc_context *ac, + enum migrate_mode mode, int *contended) { return COMPACT_CONTINUE; } -- cgit v1.2.3 From 05891fb06517d19ae5357c9dc44e96bbe0300a3c Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2015 15:25:47 -0800 Subject: mm: microoptimize zonelist operations next_zones_zonelist() returns a zoneref pointer, as well as a zone pointer via extra parameter. Since the latter can be trivially obtained by dereferencing the former, the overhead of the extra parameter is unjustified. This patch thus removes the zone parameter from next_zones_zonelist(). Both callers happen to be in the same header file, so it's simple to add the zoneref dereference inline. We save some bytes of code size. add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-105 (-105) function old new delta nr_free_zone_pages 129 115 -14 __alloc_pages_nodemask 2300 2285 -15 get_page_from_freelist 2652 2576 -76 add/remove: 0/0 grow/shrink: 1/0 up/down: 10/0 (10) function old new delta try_to_compact_pages 569 579 +10 Signed-off-by: Vlastimil Babka Cc: Mel Gorman Cc: Zhang Yanfei Cc: Minchan Kim Cc: David Rientjes Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b41829701334..f279d9c158cd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -970,7 +970,6 @@ static inline int zonelist_node_idx(struct zoneref *zoneref) * @z - The cursor used as a starting point for the search * @highest_zoneidx - The zone index of the highest zone to return * @nodes - An optional nodemask to filter the zonelist with - * @zone - The first suitable zone found is returned via this parameter * * This function returns the next zone at or below a given zone index that is * within the allowed nodemask using a cursor as the starting point for the @@ -980,8 +979,7 @@ static inline int zonelist_node_idx(struct zoneref *zoneref) */ struct zoneref *next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, - nodemask_t *nodes, - struct zone **zone); + nodemask_t *nodes); /** * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist @@ -1000,8 +998,10 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, nodemask_t *nodes, struct zone **zone) { - return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes, - zone); + struct zoneref *z = next_zones_zonelist(zonelist->_zonerefs, + highest_zoneidx, nodes); + *zone = zonelist_zone(z); + return z; } /** @@ -1018,7 +1018,8 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \ zone; \ - z = next_zones_zonelist(++z, highidx, nodemask, &zone)) \ + z = next_zones_zonelist(++z, highidx, nodemask), \ + zone = zonelist_zone(z)) \ /** * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index -- cgit v1.2.3 From 90cbc2508827e1e15dca23361c33cc26dd2b9e99 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 11 Feb 2015 15:25:55 -0800 Subject: vmscan: force scan offline memory cgroups Since commit b2052564e66d ("mm: memcontrol: continue cache reclaim from offlined groups") pages charged to a memory cgroup are not reparented when the cgroup is removed. Instead, they are supposed to be reclaimed in a regular way, along with pages accounted to online memory cgroups. However, an lruvec of an offline memory cgroup will sooner or later get so small that it will be scanned only at low scan priorities (see get_scan_count()). Therefore, if there are enough reclaimable pages in big lruvecs, pages accounted to offline memory cgroups will never be scanned at all, wasting memory. Fix this by unconditionally forcing scanning dead lruvecs from kswapd. [akpm@linux-foundation.org: fix build] Signed-off-by: Vladimir Davydov Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 76b4084b8d08..353537a5981a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -102,6 +102,7 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); * For memory reclaim. */ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec); +bool mem_cgroup_lruvec_online(struct lruvec *lruvec); int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list); void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int); @@ -266,6 +267,11 @@ mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) return 1; } +static inline bool mem_cgroup_lruvec_online(struct lruvec *lruvec) +{ + return true; +} + static inline unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) { -- cgit v1.2.3 From 650c5e565492f9092552bfe4d65935196c7d9567 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 11 Feb 2015 15:26:03 -0800 Subject: mm: page_counter: pull "-1" handling out of page_counter_memparse() The unified hierarchy interface for memory cgroups will no longer use "-1" to mean maximum possible resource value. In preparation for this, make the string an argument and let the caller supply it. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: Greg Thelen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_counter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 955421575d16..17fa4f8de3a6 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -41,7 +41,8 @@ int page_counter_try_charge(struct page_counter *counter, struct page_counter **fail); void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages); int page_counter_limit(struct page_counter *counter, unsigned long limit); -int page_counter_memparse(const char *buf, unsigned long *nr_pages); +int page_counter_memparse(const char *buf, const char *max, + unsigned long *nr_pages); static inline void page_counter_reset_watermark(struct page_counter *counter) { -- cgit v1.2.3 From 241994ed8649f7300667be8b13a9e04ae04e05a1 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 11 Feb 2015 15:26:06 -0800 Subject: mm: memcontrol: default hierarchy interface for memory Introduce the basic control files to account, partition, and limit memory using cgroups in default hierarchy mode. This interface versioning allows us to address fundamental design issues in the existing memory cgroup interface, further explained below. The old interface will be maintained indefinitely, but a clearer model and improved workload performance should encourage existing users to switch over to the new one eventually. The control files are thus: - memory.current shows the current consumption of the cgroup and its descendants, in bytes. - memory.low configures the lower end of the cgroup's expected memory consumption range. The kernel considers memory below that boundary to be a reserve - the minimum that the workload needs in order to make forward progress - and generally avoids reclaiming it, unless there is an imminent risk of entering an OOM situation. - memory.high configures the upper end of the cgroup's expected memory consumption range. A cgroup whose consumption grows beyond this threshold is forced into direct reclaim, to work off the excess and to throttle new allocations heavily, but is generally allowed to continue and the OOM killer is not invoked. - memory.max configures the hard maximum amount of memory that the cgroup is allowed to consume before the OOM killer is invoked. - memory.events shows event counters that indicate how often the cgroup was reclaimed while below memory.low, how often it was forced to reclaim excess beyond memory.high, how often it hit memory.max, and how often it entered OOM due to memory.max. This allows users to identify configuration problems when observing a degradation in workload performance. An overcommitted system will have an increased rate of low boundary breaches, whereas increased rates of high limit breaches, maximum hits, or even OOM situations will indicate internally overcommitted cgroups. For existing users of memory cgroups, the following deviations from the current interface are worth pointing out and explaining: - The original lower boundary, the soft limit, is defined as a limit that is per default unset. As a result, the set of cgroups that global reclaim prefers is opt-in, rather than opt-out. The costs for optimizing these mostly negative lookups are so high that the implementation, despite its enormous size, does not even provide the basic desirable behavior. First off, the soft limit has no hierarchical meaning. All configured groups are organized in a global rbtree and treated like equal peers, regardless where they are located in the hierarchy. This makes subtree delegation impossible. Second, the soft limit reclaim pass is so aggressive that it not just introduces high allocation latencies into the system, but also impacts system performance due to overreclaim, to the point where the feature becomes self-defeating. The memory.low boundary on the other hand is a top-down allocated reserve. A cgroup enjoys reclaim protection when it and all its ancestors are below their low boundaries, which makes delegation of subtrees possible. Secondly, new cgroups have no reserve per default and in the common case most cgroups are eligible for the preferred reclaim pass. This allows the new low boundary to be efficiently implemented with just a minor addition to the generic reclaim code, without the need for out-of-band data structures and reclaim passes. Because the generic reclaim code considers all cgroups except for the ones running low in the preferred first reclaim pass, overreclaim of individual groups is eliminated as well, resulting in much better overall workload performance. - The original high boundary, the hard limit, is defined as a strict limit that can not budge, even if the OOM killer has to be called. But this generally goes against the goal of making the most out of the available memory. The memory consumption of workloads varies during runtime, and that requires users to overcommit. But doing that with a strict upper limit requires either a fairly accurate prediction of the working set size or adding slack to the limit. Since working set size estimation is hard and error prone, and getting it wrong results in OOM kills, most users tend to err on the side of a looser limit and end up wasting precious resources. The memory.high boundary on the other hand can be set much more conservatively. When hit, it throttles allocations by forcing them into direct reclaim to work off the excess, but it never invokes the OOM killer. As a result, a high boundary that is chosen too aggressively will not terminate the processes, but instead it will lead to gradual performance degradation. The user can monitor this and make corrections until the minimal memory footprint that still gives acceptable performance is found. In extreme cases, with many concurrent allocations and a complete breakdown of reclaim progress within the group, the high boundary can be exceeded. But even then it's mostly better to satisfy the allocation from the slack available in other groups or the rest of the system than killing the group. Otherwise, memory.max is there to limit this type of spillover and ultimately contain buggy or even malicious applications. - The original control file names are unwieldy and inconsistent in many different ways. For example, the upper boundary hit count is exported in the memory.failcnt file, but an OOM event count has to be manually counted by listening to memory.oom_control events, and lower boundary / soft limit events have to be counted by first setting a threshold for that value and then counting those events. Also, usage and limit files encode their units in the filename. That makes the filenames very long, even though this is not information that a user needs to be reminded of every time they type out those names. To address these naming issues, as well as to signal clearly that the new interface carries a new configuration model, the naming conventions in it necessarily differ from the old interface. - The original limit files indicate the state of an unset limit with a very high number, and a configured limit can be unset by echoing -1 into those files. But that very high number is implementation and architecture dependent and not very descriptive. And while -1 can be understood as an underflow into the highest possible value, -2 or -10M etc. do not work, so it's not inconsistent. memory.low, memory.high, and memory.max will use the string "infinity" to indicate and set the highest possible value. [akpm@linux-foundation.org: use seq_puts() for basic strings] Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: Greg Thelen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 353537a5981a..6cfd934c7c9b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -52,7 +52,27 @@ struct mem_cgroup_reclaim_cookie { unsigned int generation; }; +enum mem_cgroup_events_index { + MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */ + MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */ + MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */ + MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */ + MEM_CGROUP_EVENTS_NSTATS, + /* default hierarchy events */ + MEMCG_LOW = MEM_CGROUP_EVENTS_NSTATS, + MEMCG_HIGH, + MEMCG_MAX, + MEMCG_OOM, + MEMCG_NR_EVENTS, +}; + #ifdef CONFIG_MEMCG +void mem_cgroup_events(struct mem_cgroup *memcg, + enum mem_cgroup_events_index idx, + unsigned int nr); + +bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); + int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **memcgp); void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, @@ -175,6 +195,18 @@ void mem_cgroup_split_huge_fixup(struct page *head); #else /* CONFIG_MEMCG */ struct mem_cgroup; +static inline void mem_cgroup_events(struct mem_cgroup *memcg, + enum mem_cgroup_events_index idx, + unsigned int nr) +{ +} + +static inline bool mem_cgroup_low(struct mem_cgroup *root, + struct mem_cgroup *memcg) +{ + return false; +} + static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **memcgp) -- cgit v1.2.3 From 49550b605587924b3336386caae53200c68969d3 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 11 Feb 2015 15:26:12 -0800 Subject: oom: add helpers for setting and clearing TIF_MEMDIE This patchset addresses a race which was described in the changelog for 5695be142e20 ("OOM, PM: OOM killed task shouldn't escape PM suspend"): : PM freezer relies on having all tasks frozen by the time devices are : getting frozen so that no task will touch them while they are getting : frozen. But OOM killer is allowed to kill an already frozen task in order : to handle OOM situtation. In order to protect from late wake ups OOM : killer is disabled after all tasks are frozen. This, however, still keeps : a window open when a killed task didn't manage to die by the time : freeze_processes finishes. The original patch hasn't closed the race window completely because that would require a more complex solution as it can be seen by this patchset. The primary motivation was to close the race condition between OOM killer and PM freezer _completely_. As Tejun pointed out, even though the race condition is unlikely the harder it would be to debug weird bugs deep in the PM freezer when the debugging options are reduced considerably. I can only speculate what might happen when a task is still runnable unexpectedly. On a plus side and as a side effect the oom enable/disable has a better (full barrier) semantic without polluting hot paths. I have tested the series in KVM with 100M RAM: - many small tasks (20M anon mmap) which are triggering OOM continually - s2ram which resumes automatically is triggered in a loop echo processors > /sys/power/pm_test while true do echo mem > /sys/power/state sleep 1s done - simple module which allocates and frees 20M in 8K chunks. If it sees freezing(current) then it tries another round of allocation before calling try_to_freeze - debugging messages of PM stages and OOM killer enable/disable/fail added and unmark_oom_victim is delayed by 1s after it clears TIF_MEMDIE and before it wakes up waiters. - rebased on top of the current mmotm which means some necessary updates in mm/oom_kill.c. mark_tsk_oom_victim is now called under task_lock but I think this should be OK because __thaw_task shouldn't interfere with any locking down wake_up_process. Oleg? As expected there are no OOM killed tasks after oom is disabled and allocations requested by the kernel thread are failing after all the tasks are frozen and OOM disabled. I wasn't able to catch a race where oom_killer_disable would really have to wait but I kinda expected the race is really unlikely. [ 242.609330] Killed process 2992 (mem_eater) total-vm:24412kB, anon-rss:2164kB, file-rss:4kB [ 243.628071] Unmarking 2992 OOM victim. oom_victims: 1 [ 243.636072] (elapsed 2.837 seconds) done. [ 243.641985] Trying to disable OOM killer [ 243.643032] Waiting for concurent OOM victims [ 243.644342] OOM killer disabled [ 243.645447] Freezing remaining freezable tasks ... (elapsed 0.005 seconds) done. [ 243.652983] Suspending console(s) (use no_console_suspend to debug) [ 243.903299] kmem_eater: page allocation failure: order:1, mode:0x204010 [...] [ 243.992600] PM: suspend of devices complete after 336.667 msecs [ 243.993264] PM: late suspend of devices complete after 0.660 msecs [ 243.994713] PM: noirq suspend of devices complete after 1.446 msecs [ 243.994717] ACPI: Preparing to enter system sleep state S3 [ 243.994795] PM: Saving platform NVS memory [ 243.994796] Disabling non-boot CPUs ... The first 2 patches are simple cleanups for OOM. They should go in regardless the rest IMO. Patches 3 and 4 are trivial printk -> pr_info conversion and they should go in ditto. The main patch is the last one and I would appreciate acks from Tejun and Rafael. I think the OOM part should be OK (except for __thaw_task vs. task_lock where a look from Oleg would appreciated) but I am not so sure I haven't screwed anything in the freezer code. I have found several surprises there. This patch (of 5): This patch is just a preparatory and it doesn't introduce any functional change. Note: I am utterly unhappy about lowmemory killer abusing TIF_MEMDIE just to wait for the oom victim and to prevent from new killing. This is just a side effect of the flag. The primary meaning is to give the oom victim access to the memory reserves and that shouldn't be necessary here. Signed-off-by: Michal Hocko Cc: Tejun Heo Cc: David Rientjes Cc: Johannes Weiner Cc: Oleg Nesterov Cc: Cong Wang Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index 76200984d1e2..b42b80f88c3a 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -47,6 +47,10 @@ static inline bool oom_task_origin(const struct task_struct *p) return !!(p->signal->oom_flags & OOM_FLAG_ORIGIN); } +extern void mark_tsk_oom_victim(struct task_struct *tsk); + +extern void unmark_oom_victim(void); + extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); -- cgit v1.2.3 From c32b3cbe0d067a9cfae85aa70ba1e97ceba0ced7 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 11 Feb 2015 15:26:24 -0800 Subject: oom, PM: make OOM detection in the freezer path raceless Commit 5695be142e20 ("OOM, PM: OOM killed task shouldn't escape PM suspend") has left a race window when OOM killer manages to note_oom_kill after freeze_processes checks the counter. The race window is quite small and really unlikely and partial solution deemed sufficient at the time of submission. Tejun wasn't happy about this partial solution though and insisted on a full solution. That requires the full OOM and freezer's task freezing exclusion, though. This is done by this patch which introduces oom_sem RW lock and turns oom_killer_disable() into a full OOM barrier. oom_killer_disabled check is moved from the allocation path to the OOM level and we take oom_sem for reading for both the check and the whole OOM invocation. oom_killer_disable() takes oom_sem for writing so it waits for all currently running OOM killer invocations. Then it disable all the further OOMs by setting oom_killer_disabled and checks for any oom victims. Victims are counted via mark_tsk_oom_victim resp. unmark_oom_victim. The last victim wakes up all waiters enqueued by oom_killer_disable(). Therefore this function acts as the full OOM barrier. The page fault path is covered now as well although it was assumed to be safe before. As per Tejun, "We used to have freezing points deep in file system code which may be reacheable from page fault." so it would be better and more robust to not rely on freezing points here. Same applies to the memcg OOM killer. out_of_memory tells the caller whether the OOM was allowed to trigger and the callers are supposed to handle the situation. The page allocation path simply fails the allocation same as before. The page fault path will retry the fault (more on that later) and Sysrq OOM trigger will simply complain to the log. Normally there wouldn't be any unfrozen user tasks after try_to_freeze_tasks so the function will not block. But if there was an OOM killer racing with try_to_freeze_tasks and the OOM victim didn't finish yet then we have to wait for it. This should complete in a finite time, though, because - the victim cannot loop in the page fault handler (it would die on the way out from the exception) - it cannot loop in the page allocator because all the further allocation would fail and __GFP_NOFAIL allocations are not acceptable at this stage - it shouldn't be blocked on any locks held by frozen tasks (try_to_freeze expects lockless context) and kernel threads and work queues are not frozen yet Signed-off-by: Michal Hocko Suggested-by: Tejun Heo Cc: David Rientjes Cc: Johannes Weiner Cc: Oleg Nesterov Cc: Cong Wang Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index b42b80f88c3a..d5771bed59c9 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -72,22 +72,14 @@ extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, unsigned long totalpages, const nodemask_t *nodemask, bool force_kill); -extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, +extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order, nodemask_t *mask, bool force_kill); extern int register_oom_notifier(struct notifier_block *nb); extern int unregister_oom_notifier(struct notifier_block *nb); extern bool oom_killer_disabled; - -static inline void oom_killer_disable(void) -{ - oom_killer_disabled = true; -} - -static inline void oom_killer_enable(void) -{ - oom_killer_disabled = false; -} +extern bool oom_killer_disable(void); +extern void oom_killer_enable(void); extern struct task_struct *find_lock_task_mm(struct task_struct *p); -- cgit v1.2.3 From dc6c9a35b66b520cf67e05d8ca60ebecad3b0479 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 11 Feb 2015 15:26:50 -0800 Subject: mm: account pmd page tables to the process Dave noticed that unprivileged process can allocate significant amount of memory -- >500 MiB on x86_64 -- and stay unnoticed by oom-killer and memory cgroup. The trick is to allocate a lot of PMD page tables. Linux kernel doesn't account PMD tables to the process, only PTE. The use-cases below use few tricks to allocate a lot of PMD page tables while keeping VmRSS and VmPTE low. oom_score for the process will be 0. #include #include #include #include #include #include #define PUD_SIZE (1UL << 30) #define PMD_SIZE (1UL << 21) #define NR_PUD 130000 int main(void) { char *addr = NULL; unsigned long i; prctl(PR_SET_THP_DISABLE); for (i = 0; i < NR_PUD ; i++) { addr = mmap(addr + PUD_SIZE, PUD_SIZE, PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); if (addr == MAP_FAILED) { perror("mmap"); break; } *addr = 'x'; munmap(addr, PMD_SIZE); mmap(addr, PMD_SIZE, PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED, -1, 0); if (addr == MAP_FAILED) perror("re-mmap"), exit(1); } printf("PID %d consumed %lu KiB in PMD page tables\n", getpid(), i * 4096 >> 10); return pause(); } The patch addresses the issue by account PMD tables to the process the same way we account PTE. The main place where PMD tables is accounted is __pmd_alloc() and free_pmd_range(). But there're few corner cases: - HugeTLB can share PMD page tables. The patch handles by accounting the table to all processes who share it. - x86 PAE pre-allocates few PMD tables on fork. - Architectures with FIRST_USER_ADDRESS > 0. We need to adjust sanity check on exit(2). Accounting only happens on configuration where PMD page table's level is present (PMD is not folded). As with nr_ptes we use per-mm counter. The counter value is used to calculate baseline for badness score by oom-killer. Signed-off-by: Kirill A. Shutemov Reported-by: Dave Hansen Cc: Hugh Dickins Reviewed-by: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: David Rientjes Tested-by: Sedat Dilek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 24 ++++++++++++++++++++++++ include/linux/mm_types.h | 3 ++- 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c6bf813a6b3d..644990b83cda 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1438,8 +1438,32 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud, { return 0; } + +static inline unsigned long mm_nr_pmds(struct mm_struct *mm) +{ + return 0; +} + +static inline void mm_inc_nr_pmds(struct mm_struct *mm) {} +static inline void mm_dec_nr_pmds(struct mm_struct *mm) {} + #else int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); + +static inline unsigned long mm_nr_pmds(struct mm_struct *mm) +{ + return atomic_long_read(&mm->nr_pmds); +} + +static inline void mm_inc_nr_pmds(struct mm_struct *mm) +{ + atomic_long_inc(&mm->nr_pmds); +} + +static inline void mm_dec_nr_pmds(struct mm_struct *mm) +{ + atomic_long_dec(&mm->nr_pmds); +} #endif int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 20ff2105b564..199a03aab8dc 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -363,7 +363,8 @@ struct mm_struct { pgd_t * pgd; atomic_t mm_users; /* How many users with user space? */ atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ - atomic_long_t nr_ptes; /* Page table pages */ + atomic_long_t nr_ptes; /* PTE page table pages */ + atomic_long_t nr_pmds; /* PMD page table pages */ int map_count; /* number of VMAs */ spinlock_t page_table_lock; /* Protects page tables and some counters */ -- cgit v1.2.3 From 16c4a097a035c01809aa0c0abd458ca1fe4ff3d0 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 11 Feb 2015 15:27:01 -0800 Subject: mm/compaction: enhance tracepoint output for compaction begin/end We now have tracepoint for begin event of compaction and it prints start position of both scanners, but, tracepoint for end event of compaction doesn't print finish position of both scanners. It'd be also useful to know finish position of both scanners so this patch add it. It will help to find odd behavior or problem on compaction internal logic. And mode is added to both begin/end tracepoint output, since according to mode, compaction behavior is quite different. And lastly, status format is changed to string rather than status number for readability. [akpm@linux-foundation.org: fix sparse warning] Signed-off-by: Joonsoo Kim Acked-by: Vlastimil Babka Cc: Mel Gorman Cc: David Rientjes Cc: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index f2efda2e6ac6..db64cae06530 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -12,6 +12,7 @@ #define COMPACT_PARTIAL 3 /* The full zone was compacted */ #define COMPACT_COMPLETE 4 +/* When adding new state, please change compaction_status_string, too */ /* Used to signal whether compaction detected need_sched() or lock contention */ /* No contention detected */ -- cgit v1.2.3 From 837d026d560c5ef26abeca0441713d82e4e82cad Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 11 Feb 2015 15:27:06 -0800 Subject: mm/compaction: more trace to understand when/why compaction start/finish It is not well analyzed that when/why compaction start/finish or not. With these new tracepoints, we can know much more about start/finish reason of compaction. I can find following bug with these tracepoint. http://www.spinics.net/lists/linux-mm/msg81582.html Signed-off-by: Joonsoo Kim Cc: Vlastimil Babka Cc: Mel Gorman Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index db64cae06530..501d7513aac1 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -12,6 +12,9 @@ #define COMPACT_PARTIAL 3 /* The full zone was compacted */ #define COMPACT_COMPLETE 4 +/* For more detailed tracepoint output */ +#define COMPACT_NO_SUITABLE_PAGE 5 +#define COMPACT_NOT_SUITABLE_ZONE 6 /* When adding new state, please change compaction_status_string, too */ /* Used to signal whether compaction detected need_sched() or lock contention */ -- cgit v1.2.3 From 24e2716f63e613cf15d3beba3faa0711bcacc427 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 11 Feb 2015 15:27:09 -0800 Subject: mm/compaction: add tracepoint to observe behaviour of compaction defer Compaction deferring logic is heavy hammer that block the way to the compaction. It doesn't consider overall system state, so it could prevent user from doing compaction falsely. In other words, even if system has enough range of memory to compact, compaction would be skipped due to compaction deferring logic. This patch add new tracepoint to understand work of deferring logic. This will also help to check compaction success and fail. Signed-off-by: Joonsoo Kim Cc: Vlastimil Babka Cc: Mel Gorman Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 65 ++++------------------------------------------ 1 file changed, 5 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 501d7513aac1..a014559e4a49 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -44,66 +44,11 @@ extern void reset_isolation_suitable(pg_data_t *pgdat); extern unsigned long compaction_suitable(struct zone *zone, int order, int alloc_flags, int classzone_idx); -/* Do not skip compaction more than 64 times */ -#define COMPACT_MAX_DEFER_SHIFT 6 - -/* - * Compaction is deferred when compaction fails to result in a page - * allocation success. 1 << compact_defer_limit compactions are skipped up - * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT - */ -static inline void defer_compaction(struct zone *zone, int order) -{ - zone->compact_considered = 0; - zone->compact_defer_shift++; - - if (order < zone->compact_order_failed) - zone->compact_order_failed = order; - - if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT) - zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT; -} - -/* Returns true if compaction should be skipped this time */ -static inline bool compaction_deferred(struct zone *zone, int order) -{ - unsigned long defer_limit = 1UL << zone->compact_defer_shift; - - if (order < zone->compact_order_failed) - return false; - - /* Avoid possible overflow */ - if (++zone->compact_considered > defer_limit) - zone->compact_considered = defer_limit; - - return zone->compact_considered < defer_limit; -} - -/* - * Update defer tracking counters after successful compaction of given order, - * which means an allocation either succeeded (alloc_success == true) or is - * expected to succeed. - */ -static inline void compaction_defer_reset(struct zone *zone, int order, - bool alloc_success) -{ - if (alloc_success) { - zone->compact_considered = 0; - zone->compact_defer_shift = 0; - } - if (order >= zone->compact_order_failed) - zone->compact_order_failed = order + 1; -} - -/* Returns true if restarting compaction after many failures */ -static inline bool compaction_restarting(struct zone *zone, int order) -{ - if (order < zone->compact_order_failed) - return false; - - return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT && - zone->compact_considered >= 1UL << zone->compact_defer_shift; -} +extern void defer_compaction(struct zone *zone, int order); +extern bool compaction_deferred(struct zone *zone, int order); +extern void compaction_defer_reset(struct zone *zone, int order, + bool alloc_success); +extern bool compaction_restarting(struct zone *zone, int order); #else static inline unsigned long try_to_compact_pages(gfp_t gfp_mask, -- cgit v1.2.3 From 077fcf116c8c2bd7ee9487b645aa3b50368db7e1 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 11 Feb 2015 15:27:12 -0800 Subject: mm/thp: allocate transparent hugepages on local node This make sure that we try to allocate hugepages from local node if allowed by mempolicy. If we can't, we fallback to small page allocation based on mempolicy. This is based on the observation that allocating pages on local node is more beneficial than allocating hugepages on remote node. With this patch applied we may find transparent huge page allocation failures if the current node doesn't have enough freee hugepages. Before this patch such failures result in us retrying the allocation on other nodes in the numa node mask. [akpm@linux-foundation.org: fix comment, add CONFIG_TRANSPARENT_HUGEPAGE dependency] Signed-off-by: Aneesh Kumar K.V Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Cc: David Rientjes Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index b840e3b2770d..60110e06419d 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -335,11 +335,15 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, int node); +extern struct page *alloc_hugepage_vma(gfp_t gfp, struct vm_area_struct *vma, + unsigned long addr, int order); #else #define alloc_pages(gfp_mask, order) \ alloc_pages_node(numa_node_id(), gfp_mask, order) #define alloc_pages_vma(gfp_mask, order, vma, addr, node) \ alloc_pages(gfp_mask, order) +#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ + alloc_pages(gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ -- cgit v1.2.3 From be97a41b291e495d6cb767b3ee0f84ed05804892 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2015 15:27:15 -0800 Subject: mm/mempolicy.c: merge alloc_hugepage_vma to alloc_pages_vma The previous commit ("mm/thp: Allocate transparent hugepages on local node") introduced alloc_hugepage_vma() to mm/mempolicy.c to perform a special policy for THP allocations. The function has the same interface as alloc_pages_vma(), shares a lot of boilerplate code and a long comment. This patch merges the hugepage special case into alloc_pages_vma. The extra if condition should be cheap enough price to pay. We also prevent a (however unlikely) race with parallel mems_allowed update, which could make hugepage allocation restart only within the fallback call to alloc_hugepage_vma() and not reconsider the special rule in alloc_hugepage_vma(). Also by making sure mpol_cond_put(pol) is always called before actual allocation attempt, we can use a single exit path within the function. Also update the comment for missing node parameter and obsolete reference to mm_sem. Signed-off-by: Vlastimil Babka Cc: Aneesh Kumar K.V Cc: Kirill A. Shutemov Cc: Vlastimil Babka Cc: David Rientjes Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 60110e06419d..51bd1e72a917 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -334,22 +334,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) } extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, - int node); -extern struct page *alloc_hugepage_vma(gfp_t gfp, struct vm_area_struct *vma, - unsigned long addr, int order); + int node, bool hugepage); +#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ + alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) #else #define alloc_pages(gfp_mask, order) \ alloc_pages_node(numa_node_id(), gfp_mask, order) -#define alloc_pages_vma(gfp_mask, order, vma, addr, node) \ +#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ alloc_pages(gfp_mask, order) #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages(gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) + alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, node) + alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order); extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, -- cgit v1.2.3 From f0818f472d8d527a96ec9cc2c3a56223497f9dd3 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 11 Feb 2015 15:27:17 -0800 Subject: mm: gup: add get_user_pages_locked and get_user_pages_unlocked FAULT_FOLL_ALLOW_RETRY allows the page fault to drop the mmap_sem for reading to reduce the mmap_sem contention (for writing), like while waiting for I/O completion. The problem is that right now practically no get_user_pages call uses FAULT_FOLL_ALLOW_RETRY, so we're not leveraging that nifty feature. Andres fixed it for the KVM page fault. However get_user_pages_fast remains uncovered, and 99% of other get_user_pages aren't using it either (the only exception being FOLL_NOWAIT in KVM which is really nonblocking and in fact it doesn't even release the mmap_sem). So this patchsets extends the optimization Andres did in the KVM page fault to the whole kernel. It makes most important places (including gup_fast) to use FAULT_FOLL_ALLOW_RETRY to reduce the mmap_sem hold times during I/O. The only few places that remains uncovered are drivers like v4l and other exceptions that tends to work on their own memory and they're not working on random user memory (for example like O_DIRECT that uses gup_fast and is fully covered by this patch). A follow up patch should probably also add a printk_once warning to get_user_pages that should go obsolete and be phased out eventually. The "vmas" parameter of get_user_pages makes it fundamentally incompatible with FAULT_FOLL_ALLOW_RETRY (vmas array becomes meaningless the moment the mmap_sem is released). While this is just an optimization, this becomes an absolute requirement for the userfaultfd feature http://lwn.net/Articles/615086/ . The userfaultfd allows to block the page fault, and in order to do so I need to drop the mmap_sem first. So this patch also ensures that all memory where userfaultfd could be registered by KVM, the very first fault (no matter if it is a regular page fault, or a get_user_pages) always has FAULT_FOLL_ALLOW_RETRY set. Then the userfaultfd blocks and it is waken only when the pagetable is already mapped. The second fault attempt after the wakeup doesn't need FAULT_FOLL_ALLOW_RETRY, so it's ok to retry without it. This patch (of 5): We can leverage the VM_FAULT_RETRY functionality in the page fault paths better by using either get_user_pages_locked or get_user_pages_unlocked. The former allows conversion of get_user_pages invocations that will have to pass a "&locked" parameter to know if the mmap_sem was dropped during the call. Example from: down_read(&mm->mmap_sem); do_something() get_user_pages(tsk, mm, ..., pages, NULL); up_read(&mm->mmap_sem); to: int locked = 1; down_read(&mm->mmap_sem); do_something() get_user_pages_locked(tsk, mm, ..., pages, &locked); if (locked) up_read(&mm->mmap_sem); The latter is suitable only as a drop in replacement of the form: down_read(&mm->mmap_sem); get_user_pages(tsk, mm, ..., pages, NULL); up_read(&mm->mmap_sem); into: get_user_pages_unlocked(tsk, mm, ..., pages); Where tsk, mm, the intermediate "..." paramters and "pages" can be any value as before. Just the last parameter of get_user_pages (vmas) must be NULL for get_user_pages_locked|unlocked to be usable (the latter original form wouldn't have been safe anyway if vmas wasn't null, for the former we just make it explicit by dropping the parameter). If vmas is not NULL these two methods cannot be used. Signed-off-by: Andrea Arcangeli Reviewed-by: Andres Lagar-Cavilla Reviewed-by: Peter Feiner Reviewed-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 644990b83cda..fc499e675475 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1261,6 +1261,13 @@ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); +long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + int *locked); +long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); struct kvec; -- cgit v1.2.3 From 0fd71a56f41d4ffabeda1dae9ff5ed4f34d4e935 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 11 Feb 2015 15:27:20 -0800 Subject: mm: gup: add __get_user_pages_unlocked to customize gup_flags Some callers (like KVM) may want to set the gup_flags like FOLL_HWPOSION to get a proper -EHWPOSION retval instead of -EFAULT to take a more appropriate action if get_user_pages runs into a memory failure. Signed-off-by: Andrea Arcangeli Reviewed-by: Kirill A. Shutemov Cc: Andres Lagar-Cavilla Cc: Peter Feiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index fc499e675475..3696b3bd1d7e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1265,6 +1265,10 @@ long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked); +long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + unsigned int gup_flags); long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages); -- cgit v1.2.3 From 0664e57ff0c68cbca012a45a38288fa277eb6795 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 11 Feb 2015 15:27:28 -0800 Subject: mm: gup: kvm use get_user_pages_unlocked Use the more generic get_user_pages_unlocked which has the additional benefit of passing FAULT_FLAG_ALLOW_RETRY at the very first page fault (which allows the first page fault in an unmapped area to be always able to block indefinitely by being allowed to release the mmap_sem). Signed-off-by: Andrea Arcangeli Reviewed-by: Andres Lagar-Cavilla Reviewed-by: Kirill A. Shutemov Cc: Peter Feiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kvm_host.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 26f106022c88..d189ee098aa2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -200,17 +200,6 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif -/* - * Carry out a gup that requires IO. Allow the mm to relinquish the mmap - * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL - * controls whether we retry the gup one more time to completion in that case. - * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp - * handler. - */ -int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, bool write_fault, - struct page **pagep); - enum { OUTSIDE_GUEST_MODE, IN_GUEST_MODE, -- cgit v1.2.3 From 0b1fbfe50006c41014cc25660c0e735d21c34939 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:27:34 -0800 Subject: mm/pagewalk: remove pgd_entry() and pud_entry() Currently no user of page table walker sets ->pgd_entry() or ->pud_entry(), so checking their existence in each loop is just wasting CPU cycle. So let's remove it to reduce overhead. Signed-off-by: Naoya Horiguchi Acked-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3696b3bd1d7e..f6106d3f3dab 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1164,8 +1164,6 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, /** * mm_walk - callbacks for walk_page_range - * @pgd_entry: if set, called for each non-empty PGD (top-level) entry - * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry * this handler is required to be able to handle * pmd_trans_huge() pmds. They may simply choose to @@ -1179,10 +1177,6 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, * (see walk_page_range for more details) */ struct mm_walk { - int (*pgd_entry)(pgd_t *pgd, unsigned long addr, - unsigned long next, struct mm_walk *walk); - int (*pud_entry)(pud_t *pud, unsigned long addr, - unsigned long next, struct mm_walk *walk); int (*pmd_entry)(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk); int (*pte_entry)(pte_t *pte, unsigned long addr, -- cgit v1.2.3 From fafaa4264eba49fd10695c193a82760558d093f4 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:27:37 -0800 Subject: pagewalk: improve vma handling Current implementation of page table walker has a fundamental problem in vma handling, which started when we tried to handle vma(VM_HUGETLB). Because it's done in pgd loop, considering vma boundary makes code complicated and bug-prone. From the users viewpoint, some user checks some vma-related condition to determine whether the user really does page walk over the vma. In order to solve these, this patch moves vma check outside pgd loop and introduce a new callback ->test_walk(). Signed-off-by: Naoya Horiguchi Acked-by: Kirill A. Shutemov Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f6106d3f3dab..3891a368e5e0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1171,10 +1171,16 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, * @pte_entry: if set, called for each non-empty PTE (4th-level) entry * @pte_hole: if set, called for each hole at all levels * @hugetlb_entry: if set, called for each hugetlb entry - * *Caution*: The caller must hold mmap_sem() if @hugetlb_entry - * is used. + * @test_walk: caller specific callback function to determine whether + * we walk over the current vma or not. A positive returned + * value means "do page table walk over the current vma," + * and a negative one means "abort current page table walk + * right now." 0 means "skip the current vma." + * @mm: mm_struct representing the target process of page table walk + * @vma: vma currently walked (NULL if walking outside vmas) + * @private: private data for callbacks' usage * - * (see walk_page_range for more details) + * (see the comment on walk_page_range() for more details) */ struct mm_walk { int (*pmd_entry)(pmd_t *pmd, unsigned long addr, @@ -1186,7 +1192,10 @@ struct mm_walk { int (*hugetlb_entry)(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long next, struct mm_walk *walk); + int (*test_walk)(unsigned long addr, unsigned long next, + struct mm_walk *walk); struct mm_struct *mm; + struct vm_area_struct *vma; void *private; }; -- cgit v1.2.3 From 900fc5f197b05253ae9433fb9a066c3f37d08f69 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:27:40 -0800 Subject: pagewalk: add walk_page_vma() Introduce walk_page_vma(), which is useful for the callers which want to walk over a given vma. It's used by later patches. Signed-off-by: Naoya Horiguchi Acked-by: Kirill A. Shutemov Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3891a368e5e0..a4d24f3c5430 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1201,6 +1201,7 @@ struct mm_walk { int walk_page_range(unsigned long addr, unsigned long end, struct mm_walk *walk); +int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk); void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct mm_struct *dst, struct mm_struct *src, -- cgit v1.2.3 From 94f759d62b2c6a9d124b0622077b1ddcfac43fb5 Mon Sep 17 00:00:00 2001 From: Sergei Rogachev Date: Wed, 11 Feb 2015 15:28:34 -0800 Subject: mm/page_owner.c: remove unnecessary stack_trace field Page owner uses the page_ext structure to keep meta-information for every page in the system. The structure also contains a field of type 'struct stack_trace', page owner uses this field during invocation of the function save_stack_trace. It is easy to notice that keeping a copy of this structure for every page in the system is very inefficiently in terms of memory. The patch removes this unnecessary field of page_ext and forces page owner to use a stack_trace structure allocated on the stack. [akpm@linux-foundation.org: use struct initializers] Signed-off-by: Sergei Rogachev Acked-by: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_ext.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index d2a2c84c72d0..c42981cd99aa 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -40,7 +40,7 @@ struct page_ext { #ifdef CONFIG_PAGE_OWNER unsigned int order; gfp_t gfp_mask; - struct stack_trace trace; + unsigned int nr_entries; unsigned long trace_entries[8]; #endif }; -- cgit v1.2.3